{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.0,
  "eval_steps": 1,
  "global_step": 6834,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.000292654375182909,
      "grad_norm": 16.360368728637695,
      "learning_rate": 4.999268364062043e-05,
      "loss": 0.6293,
      "step": 1
    },
    {
      "epoch": 0.000585308750365818,
      "grad_norm": 10.653396606445312,
      "learning_rate": 4.998536728124086e-05,
      "loss": 0.9532,
      "step": 2
    },
    {
      "epoch": 0.000877963125548727,
      "grad_norm": 4.163839817047119,
      "learning_rate": 4.9978050921861286e-05,
      "loss": 0.2345,
      "step": 3
    },
    {
      "epoch": 0.001170617500731636,
      "grad_norm": 3.368375539779663,
      "learning_rate": 4.9970734562481714e-05,
      "loss": 0.2212,
      "step": 4
    },
    {
      "epoch": 0.0014632718759145448,
      "grad_norm": 8.007204055786133,
      "learning_rate": 4.996341820310214e-05,
      "loss": 0.3395,
      "step": 5
    },
    {
      "epoch": 0.001755926251097454,
      "grad_norm": 9.681714057922363,
      "learning_rate": 4.995610184372256e-05,
      "loss": 0.4309,
      "step": 6
    },
    {
      "epoch": 0.002048580626280363,
      "grad_norm": 4.414303779602051,
      "learning_rate": 4.994878548434299e-05,
      "loss": 0.2745,
      "step": 7
    },
    {
      "epoch": 0.002341235001463272,
      "grad_norm": 3.7951767444610596,
      "learning_rate": 4.994146912496342e-05,
      "loss": 0.1274,
      "step": 8
    },
    {
      "epoch": 0.0026338893766461808,
      "grad_norm": 7.083094120025635,
      "learning_rate": 4.993415276558385e-05,
      "loss": 0.3291,
      "step": 9
    },
    {
      "epoch": 0.0029265437518290896,
      "grad_norm": 2.265326738357544,
      "learning_rate": 4.9926836406204275e-05,
      "loss": 0.1044,
      "step": 10
    },
    {
      "epoch": 0.003219198127011999,
      "grad_norm": 2.20458984375,
      "learning_rate": 4.99195200468247e-05,
      "loss": 0.1148,
      "step": 11
    },
    {
      "epoch": 0.003511852502194908,
      "grad_norm": 2.6887764930725098,
      "learning_rate": 4.991220368744513e-05,
      "loss": 0.1535,
      "step": 12
    },
    {
      "epoch": 0.0038045068773778167,
      "grad_norm": 5.465479850769043,
      "learning_rate": 4.990488732806556e-05,
      "loss": 0.2356,
      "step": 13
    },
    {
      "epoch": 0.004097161252560726,
      "grad_norm": 2.450127601623535,
      "learning_rate": 4.989757096868599e-05,
      "loss": 0.0974,
      "step": 14
    },
    {
      "epoch": 0.004389815627743635,
      "grad_norm": 5.298760890960693,
      "learning_rate": 4.9890254609306415e-05,
      "loss": 0.2099,
      "step": 15
    },
    {
      "epoch": 0.004682470002926544,
      "grad_norm": 1.242136836051941,
      "learning_rate": 4.9882938249926836e-05,
      "loss": 0.0506,
      "step": 16
    },
    {
      "epoch": 0.004975124378109453,
      "grad_norm": 3.672600269317627,
      "learning_rate": 4.9875621890547264e-05,
      "loss": 0.1305,
      "step": 17
    },
    {
      "epoch": 0.0052677787532923615,
      "grad_norm": 3.408050298690796,
      "learning_rate": 4.986830553116769e-05,
      "loss": 0.0902,
      "step": 18
    },
    {
      "epoch": 0.00556043312847527,
      "grad_norm": 9.07597541809082,
      "learning_rate": 4.986098917178812e-05,
      "loss": 0.1501,
      "step": 19
    },
    {
      "epoch": 0.005853087503658179,
      "grad_norm": 11.2225341796875,
      "learning_rate": 4.985367281240855e-05,
      "loss": 0.1522,
      "step": 20
    },
    {
      "epoch": 0.006145741878841089,
      "grad_norm": 7.519731521606445,
      "learning_rate": 4.9846356453028976e-05,
      "loss": 0.1536,
      "step": 21
    },
    {
      "epoch": 0.006438396254023998,
      "grad_norm": 10.621881484985352,
      "learning_rate": 4.9839040093649404e-05,
      "loss": 0.2526,
      "step": 22
    },
    {
      "epoch": 0.006731050629206907,
      "grad_norm": 11.814753532409668,
      "learning_rate": 4.983172373426983e-05,
      "loss": 0.2104,
      "step": 23
    },
    {
      "epoch": 0.007023705004389816,
      "grad_norm": 2.594482421875,
      "learning_rate": 4.982440737489026e-05,
      "loss": 0.0617,
      "step": 24
    },
    {
      "epoch": 0.0073163593795727245,
      "grad_norm": 2.499378204345703,
      "learning_rate": 4.981709101551069e-05,
      "loss": 0.0613,
      "step": 25
    },
    {
      "epoch": 0.007609013754755633,
      "grad_norm": 3.145395278930664,
      "learning_rate": 4.9809774656131116e-05,
      "loss": 0.0386,
      "step": 26
    },
    {
      "epoch": 0.007901668129938543,
      "grad_norm": 4.624070167541504,
      "learning_rate": 4.980245829675154e-05,
      "loss": 0.1326,
      "step": 27
    },
    {
      "epoch": 0.008194322505121452,
      "grad_norm": 9.242585182189941,
      "learning_rate": 4.9795141937371965e-05,
      "loss": 0.154,
      "step": 28
    },
    {
      "epoch": 0.008486976880304361,
      "grad_norm": 0.6491050720214844,
      "learning_rate": 4.978782557799239e-05,
      "loss": 0.0038,
      "step": 29
    },
    {
      "epoch": 0.00877963125548727,
      "grad_norm": 4.768040180206299,
      "learning_rate": 4.978050921861282e-05,
      "loss": 0.1239,
      "step": 30
    },
    {
      "epoch": 0.009072285630670179,
      "grad_norm": 5.549623966217041,
      "learning_rate": 4.977319285923325e-05,
      "loss": 0.267,
      "step": 31
    },
    {
      "epoch": 0.009364940005853088,
      "grad_norm": 7.482322692871094,
      "learning_rate": 4.9765876499853676e-05,
      "loss": 0.2245,
      "step": 32
    },
    {
      "epoch": 0.009657594381035996,
      "grad_norm": 11.393575668334961,
      "learning_rate": 4.9758560140474104e-05,
      "loss": 0.2126,
      "step": 33
    },
    {
      "epoch": 0.009950248756218905,
      "grad_norm": 22.630184173583984,
      "learning_rate": 4.975124378109453e-05,
      "loss": 0.6228,
      "step": 34
    },
    {
      "epoch": 0.010242903131401814,
      "grad_norm": 17.684167861938477,
      "learning_rate": 4.974392742171496e-05,
      "loss": 0.4424,
      "step": 35
    },
    {
      "epoch": 0.010535557506584723,
      "grad_norm": 5.006700038909912,
      "learning_rate": 4.973661106233539e-05,
      "loss": 0.1162,
      "step": 36
    },
    {
      "epoch": 0.010828211881767632,
      "grad_norm": 8.332120895385742,
      "learning_rate": 4.9729294702955816e-05,
      "loss": 0.2701,
      "step": 37
    },
    {
      "epoch": 0.01112086625695054,
      "grad_norm": 5.902966022491455,
      "learning_rate": 4.972197834357624e-05,
      "loss": 0.1282,
      "step": 38
    },
    {
      "epoch": 0.01141352063213345,
      "grad_norm": 3.1011571884155273,
      "learning_rate": 4.9714661984196665e-05,
      "loss": 0.0795,
      "step": 39
    },
    {
      "epoch": 0.011706175007316359,
      "grad_norm": 4.028739929199219,
      "learning_rate": 4.970734562481709e-05,
      "loss": 0.1002,
      "step": 40
    },
    {
      "epoch": 0.01199882938249927,
      "grad_norm": 4.598829746246338,
      "learning_rate": 4.970002926543752e-05,
      "loss": 0.1702,
      "step": 41
    },
    {
      "epoch": 0.012291483757682178,
      "grad_norm": 10.465580940246582,
      "learning_rate": 4.969271290605795e-05,
      "loss": 0.2358,
      "step": 42
    },
    {
      "epoch": 0.012584138132865087,
      "grad_norm": 16.530763626098633,
      "learning_rate": 4.968539654667838e-05,
      "loss": 0.3765,
      "step": 43
    },
    {
      "epoch": 0.012876792508047996,
      "grad_norm": 11.63476276397705,
      "learning_rate": 4.9678080187298805e-05,
      "loss": 0.259,
      "step": 44
    },
    {
      "epoch": 0.013169446883230905,
      "grad_norm": 7.477903366088867,
      "learning_rate": 4.967076382791923e-05,
      "loss": 0.1471,
      "step": 45
    },
    {
      "epoch": 0.013462101258413814,
      "grad_norm": 1.3951044082641602,
      "learning_rate": 4.966344746853966e-05,
      "loss": 0.055,
      "step": 46
    },
    {
      "epoch": 0.013754755633596722,
      "grad_norm": 5.2689595222473145,
      "learning_rate": 4.965613110916009e-05,
      "loss": 0.0843,
      "step": 47
    },
    {
      "epoch": 0.014047410008779631,
      "grad_norm": 1.5146514177322388,
      "learning_rate": 4.964881474978051e-05,
      "loss": 0.0214,
      "step": 48
    },
    {
      "epoch": 0.01434006438396254,
      "grad_norm": 1.2873971462249756,
      "learning_rate": 4.964149839040094e-05,
      "loss": 0.0206,
      "step": 49
    },
    {
      "epoch": 0.014632718759145449,
      "grad_norm": 2.304189920425415,
      "learning_rate": 4.9634182031021366e-05,
      "loss": 0.0219,
      "step": 50
    },
    {
      "epoch": 0.014925373134328358,
      "grad_norm": 3.5825271606445312,
      "learning_rate": 4.9626865671641794e-05,
      "loss": 0.0284,
      "step": 51
    },
    {
      "epoch": 0.015218027509511267,
      "grad_norm": 7.846745491027832,
      "learning_rate": 4.961954931226222e-05,
      "loss": 0.1349,
      "step": 52
    },
    {
      "epoch": 0.015510681884694176,
      "grad_norm": 0.15582777559757233,
      "learning_rate": 4.961223295288265e-05,
      "loss": 0.0021,
      "step": 53
    },
    {
      "epoch": 0.015803336259877086,
      "grad_norm": 0.20206129550933838,
      "learning_rate": 4.960491659350308e-05,
      "loss": 0.0023,
      "step": 54
    },
    {
      "epoch": 0.016095990635059995,
      "grad_norm": 8.418569564819336,
      "learning_rate": 4.9597600234123506e-05,
      "loss": 0.0311,
      "step": 55
    },
    {
      "epoch": 0.016388645010242904,
      "grad_norm": 0.4458349347114563,
      "learning_rate": 4.9590283874743934e-05,
      "loss": 0.0015,
      "step": 56
    },
    {
      "epoch": 0.016681299385425813,
      "grad_norm": 15.10268497467041,
      "learning_rate": 4.958296751536436e-05,
      "loss": 0.1544,
      "step": 57
    },
    {
      "epoch": 0.016973953760608722,
      "grad_norm": 3.0525522232055664,
      "learning_rate": 4.957565115598479e-05,
      "loss": 0.0124,
      "step": 58
    },
    {
      "epoch": 0.01726660813579163,
      "grad_norm": 0.11207275837659836,
      "learning_rate": 4.956833479660521e-05,
      "loss": 0.0007,
      "step": 59
    },
    {
      "epoch": 0.01755926251097454,
      "grad_norm": 4.801476001739502,
      "learning_rate": 4.956101843722564e-05,
      "loss": 0.0091,
      "step": 60
    },
    {
      "epoch": 0.01785191688615745,
      "grad_norm": 0.045167919248342514,
      "learning_rate": 4.955370207784607e-05,
      "loss": 0.0004,
      "step": 61
    },
    {
      "epoch": 0.018144571261340357,
      "grad_norm": 12.306194305419922,
      "learning_rate": 4.9546385718466495e-05,
      "loss": 0.0526,
      "step": 62
    },
    {
      "epoch": 0.018437225636523266,
      "grad_norm": 0.4786720871925354,
      "learning_rate": 4.953906935908692e-05,
      "loss": 0.0019,
      "step": 63
    },
    {
      "epoch": 0.018729880011706175,
      "grad_norm": 6.309378623962402,
      "learning_rate": 4.953175299970735e-05,
      "loss": 0.1884,
      "step": 64
    },
    {
      "epoch": 0.019022534386889084,
      "grad_norm": 3.3473260402679443,
      "learning_rate": 4.952443664032778e-05,
      "loss": 0.229,
      "step": 65
    },
    {
      "epoch": 0.019315188762071993,
      "grad_norm": 11.742388725280762,
      "learning_rate": 4.9517120280948206e-05,
      "loss": 0.3438,
      "step": 66
    },
    {
      "epoch": 0.0196078431372549,
      "grad_norm": 7.495570659637451,
      "learning_rate": 4.9509803921568634e-05,
      "loss": 0.0878,
      "step": 67
    },
    {
      "epoch": 0.01990049751243781,
      "grad_norm": 0.14235353469848633,
      "learning_rate": 4.950248756218906e-05,
      "loss": 0.002,
      "step": 68
    },
    {
      "epoch": 0.02019315188762072,
      "grad_norm": 2.739047050476074,
      "learning_rate": 4.9495171202809484e-05,
      "loss": 0.0183,
      "step": 69
    },
    {
      "epoch": 0.02048580626280363,
      "grad_norm": 2.6751646995544434,
      "learning_rate": 4.948785484342991e-05,
      "loss": 0.0417,
      "step": 70
    },
    {
      "epoch": 0.020778460637986537,
      "grad_norm": 4.878410339355469,
      "learning_rate": 4.948053848405034e-05,
      "loss": 0.0622,
      "step": 71
    },
    {
      "epoch": 0.021071115013169446,
      "grad_norm": 3.525151014328003,
      "learning_rate": 4.947322212467077e-05,
      "loss": 0.0839,
      "step": 72
    },
    {
      "epoch": 0.021363769388352355,
      "grad_norm": 0.149206280708313,
      "learning_rate": 4.9465905765291195e-05,
      "loss": 0.0018,
      "step": 73
    },
    {
      "epoch": 0.021656423763535264,
      "grad_norm": 0.9150102734565735,
      "learning_rate": 4.945858940591162e-05,
      "loss": 0.0048,
      "step": 74
    },
    {
      "epoch": 0.021949078138718173,
      "grad_norm": 0.07271959632635117,
      "learning_rate": 4.945127304653205e-05,
      "loss": 0.0014,
      "step": 75
    },
    {
      "epoch": 0.02224173251390108,
      "grad_norm": 7.08049201965332,
      "learning_rate": 4.944395668715248e-05,
      "loss": 0.058,
      "step": 76
    },
    {
      "epoch": 0.02253438688908399,
      "grad_norm": 3.4751858711242676,
      "learning_rate": 4.943664032777291e-05,
      "loss": 0.0364,
      "step": 77
    },
    {
      "epoch": 0.0228270412642669,
      "grad_norm": 7.454194068908691,
      "learning_rate": 4.9429323968393335e-05,
      "loss": 0.0815,
      "step": 78
    },
    {
      "epoch": 0.023119695639449808,
      "grad_norm": 1.007652759552002,
      "learning_rate": 4.9422007609013756e-05,
      "loss": 0.0064,
      "step": 79
    },
    {
      "epoch": 0.023412350014632717,
      "grad_norm": 4.862194061279297,
      "learning_rate": 4.9414691249634184e-05,
      "loss": 0.0276,
      "step": 80
    },
    {
      "epoch": 0.02370500438981563,
      "grad_norm": 13.2680082321167,
      "learning_rate": 4.940737489025461e-05,
      "loss": 0.3092,
      "step": 81
    },
    {
      "epoch": 0.02399765876499854,
      "grad_norm": 14.458672523498535,
      "learning_rate": 4.940005853087504e-05,
      "loss": 0.1856,
      "step": 82
    },
    {
      "epoch": 0.024290313140181447,
      "grad_norm": 11.084071159362793,
      "learning_rate": 4.939274217149547e-05,
      "loss": 0.2517,
      "step": 83
    },
    {
      "epoch": 0.024582967515364356,
      "grad_norm": 11.786449432373047,
      "learning_rate": 4.9385425812115896e-05,
      "loss": 0.2811,
      "step": 84
    },
    {
      "epoch": 0.024875621890547265,
      "grad_norm": 3.7863101959228516,
      "learning_rate": 4.9378109452736324e-05,
      "loss": 0.1465,
      "step": 85
    },
    {
      "epoch": 0.025168276265730174,
      "grad_norm": 3.508310556411743,
      "learning_rate": 4.937079309335675e-05,
      "loss": 0.0545,
      "step": 86
    },
    {
      "epoch": 0.025460930640913083,
      "grad_norm": 4.037002086639404,
      "learning_rate": 4.936347673397717e-05,
      "loss": 0.1531,
      "step": 87
    },
    {
      "epoch": 0.02575358501609599,
      "grad_norm": 15.45694351196289,
      "learning_rate": 4.93561603745976e-05,
      "loss": 0.4562,
      "step": 88
    },
    {
      "epoch": 0.0260462393912789,
      "grad_norm": 5.105620384216309,
      "learning_rate": 4.934884401521803e-05,
      "loss": 0.0803,
      "step": 89
    },
    {
      "epoch": 0.02633889376646181,
      "grad_norm": 3.120607852935791,
      "learning_rate": 4.934152765583846e-05,
      "loss": 0.037,
      "step": 90
    },
    {
      "epoch": 0.026631548141644718,
      "grad_norm": 13.44425106048584,
      "learning_rate": 4.9334211296458885e-05,
      "loss": 0.2523,
      "step": 91
    },
    {
      "epoch": 0.026924202516827627,
      "grad_norm": 8.555777549743652,
      "learning_rate": 4.932689493707931e-05,
      "loss": 0.1941,
      "step": 92
    },
    {
      "epoch": 0.027216856892010536,
      "grad_norm": 4.331564903259277,
      "learning_rate": 4.931957857769974e-05,
      "loss": 0.0826,
      "step": 93
    },
    {
      "epoch": 0.027509511267193445,
      "grad_norm": 4.442009449005127,
      "learning_rate": 4.931226221832017e-05,
      "loss": 0.0389,
      "step": 94
    },
    {
      "epoch": 0.027802165642376354,
      "grad_norm": 6.328910827636719,
      "learning_rate": 4.930494585894059e-05,
      "loss": 0.0539,
      "step": 95
    },
    {
      "epoch": 0.028094820017559263,
      "grad_norm": 7.026902675628662,
      "learning_rate": 4.929762949956102e-05,
      "loss": 0.0754,
      "step": 96
    },
    {
      "epoch": 0.02838747439274217,
      "grad_norm": 1.1635509729385376,
      "learning_rate": 4.9290313140181446e-05,
      "loss": 0.0121,
      "step": 97
    },
    {
      "epoch": 0.02868012876792508,
      "grad_norm": 1.090627908706665,
      "learning_rate": 4.9282996780801874e-05,
      "loss": 0.0102,
      "step": 98
    },
    {
      "epoch": 0.02897278314310799,
      "grad_norm": 7.56167459487915,
      "learning_rate": 4.92756804214223e-05,
      "loss": 0.2045,
      "step": 99
    },
    {
      "epoch": 0.029265437518290898,
      "grad_norm": 4.7205281257629395,
      "learning_rate": 4.926836406204273e-05,
      "loss": 0.0799,
      "step": 100
    },
    {
      "epoch": 0.029558091893473807,
      "grad_norm": 0.6290732026100159,
      "learning_rate": 4.926104770266316e-05,
      "loss": 0.0065,
      "step": 101
    },
    {
      "epoch": 0.029850746268656716,
      "grad_norm": 8.050834655761719,
      "learning_rate": 4.9253731343283586e-05,
      "loss": 0.1108,
      "step": 102
    },
    {
      "epoch": 0.030143400643839625,
      "grad_norm": 0.0645279660820961,
      "learning_rate": 4.924641498390401e-05,
      "loss": 0.0013,
      "step": 103
    },
    {
      "epoch": 0.030436055019022534,
      "grad_norm": 0.08164330571889877,
      "learning_rate": 4.9239098624524435e-05,
      "loss": 0.0014,
      "step": 104
    },
    {
      "epoch": 0.030728709394205442,
      "grad_norm": 1.1083030700683594,
      "learning_rate": 4.923178226514486e-05,
      "loss": 0.0069,
      "step": 105
    },
    {
      "epoch": 0.03102136376938835,
      "grad_norm": 0.28905317187309265,
      "learning_rate": 4.922446590576529e-05,
      "loss": 0.0047,
      "step": 106
    },
    {
      "epoch": 0.031314018144571264,
      "grad_norm": 0.8381115198135376,
      "learning_rate": 4.921714954638572e-05,
      "loss": 0.0054,
      "step": 107
    },
    {
      "epoch": 0.03160667251975417,
      "grad_norm": 5.30599308013916,
      "learning_rate": 4.9209833187006146e-05,
      "loss": 0.0412,
      "step": 108
    },
    {
      "epoch": 0.03189932689493708,
      "grad_norm": 1.0647958517074585,
      "learning_rate": 4.9202516827626574e-05,
      "loss": 0.0059,
      "step": 109
    },
    {
      "epoch": 0.03219198127011999,
      "grad_norm": 0.035897109657526016,
      "learning_rate": 4.9195200468247e-05,
      "loss": 0.0007,
      "step": 110
    },
    {
      "epoch": 0.0324846356453029,
      "grad_norm": 0.42506933212280273,
      "learning_rate": 4.918788410886743e-05,
      "loss": 0.0018,
      "step": 111
    },
    {
      "epoch": 0.03277729002048581,
      "grad_norm": 0.009726514108479023,
      "learning_rate": 4.918056774948785e-05,
      "loss": 0.0001,
      "step": 112
    },
    {
      "epoch": 0.03306994439566872,
      "grad_norm": 0.08547463268041611,
      "learning_rate": 4.917325139010828e-05,
      "loss": 0.0008,
      "step": 113
    },
    {
      "epoch": 0.033362598770851626,
      "grad_norm": 0.010455029085278511,
      "learning_rate": 4.916593503072871e-05,
      "loss": 0.0002,
      "step": 114
    },
    {
      "epoch": 0.033655253146034535,
      "grad_norm": 0.019560284912586212,
      "learning_rate": 4.9158618671349135e-05,
      "loss": 0.0003,
      "step": 115
    },
    {
      "epoch": 0.033947907521217444,
      "grad_norm": 0.008105482906103134,
      "learning_rate": 4.915130231196956e-05,
      "loss": 0.0001,
      "step": 116
    },
    {
      "epoch": 0.03424056189640035,
      "grad_norm": 0.03059670701622963,
      "learning_rate": 4.914398595258999e-05,
      "loss": 0.0003,
      "step": 117
    },
    {
      "epoch": 0.03453321627158326,
      "grad_norm": 10.641276359558105,
      "learning_rate": 4.913666959321042e-05,
      "loss": 0.27,
      "step": 118
    },
    {
      "epoch": 0.03482587064676617,
      "grad_norm": 6.722626686096191,
      "learning_rate": 4.912935323383085e-05,
      "loss": 0.0246,
      "step": 119
    },
    {
      "epoch": 0.03511852502194908,
      "grad_norm": 0.016005797311663628,
      "learning_rate": 4.9122036874451275e-05,
      "loss": 0.0002,
      "step": 120
    },
    {
      "epoch": 0.03541117939713199,
      "grad_norm": 3.1229820251464844,
      "learning_rate": 4.91147205150717e-05,
      "loss": 0.0201,
      "step": 121
    },
    {
      "epoch": 0.0357038337723149,
      "grad_norm": 0.020100802183151245,
      "learning_rate": 4.910740415569213e-05,
      "loss": 0.0002,
      "step": 122
    },
    {
      "epoch": 0.035996488147497806,
      "grad_norm": 0.1265154480934143,
      "learning_rate": 4.910008779631255e-05,
      "loss": 0.0008,
      "step": 123
    },
    {
      "epoch": 0.036289142522680715,
      "grad_norm": 0.003131201257929206,
      "learning_rate": 4.909277143693298e-05,
      "loss": 0.0001,
      "step": 124
    },
    {
      "epoch": 0.036581796897863623,
      "grad_norm": 14.657594680786133,
      "learning_rate": 4.908545507755341e-05,
      "loss": 0.0794,
      "step": 125
    },
    {
      "epoch": 0.03687445127304653,
      "grad_norm": 13.732138633728027,
      "learning_rate": 4.9078138718173836e-05,
      "loss": 0.1962,
      "step": 126
    },
    {
      "epoch": 0.03716710564822944,
      "grad_norm": 0.01592232845723629,
      "learning_rate": 4.9070822358794264e-05,
      "loss": 0.0003,
      "step": 127
    },
    {
      "epoch": 0.03745976002341235,
      "grad_norm": 9.321319580078125,
      "learning_rate": 4.906350599941469e-05,
      "loss": 0.079,
      "step": 128
    },
    {
      "epoch": 0.03775241439859526,
      "grad_norm": 0.34992310404777527,
      "learning_rate": 4.905618964003512e-05,
      "loss": 0.0014,
      "step": 129
    },
    {
      "epoch": 0.03804506877377817,
      "grad_norm": 11.185626029968262,
      "learning_rate": 4.904887328065555e-05,
      "loss": 0.1738,
      "step": 130
    },
    {
      "epoch": 0.03833772314896108,
      "grad_norm": 0.4017196297645569,
      "learning_rate": 4.9041556921275976e-05,
      "loss": 0.0015,
      "step": 131
    },
    {
      "epoch": 0.038630377524143986,
      "grad_norm": 9.26835823059082,
      "learning_rate": 4.9034240561896404e-05,
      "loss": 0.1436,
      "step": 132
    },
    {
      "epoch": 0.038923031899326895,
      "grad_norm": 0.014303785748779774,
      "learning_rate": 4.9026924202516825e-05,
      "loss": 0.0002,
      "step": 133
    },
    {
      "epoch": 0.0392156862745098,
      "grad_norm": 15.018467903137207,
      "learning_rate": 4.901960784313725e-05,
      "loss": 0.2495,
      "step": 134
    },
    {
      "epoch": 0.03950834064969271,
      "grad_norm": 17.677291870117188,
      "learning_rate": 4.901229148375768e-05,
      "loss": 0.5083,
      "step": 135
    },
    {
      "epoch": 0.03980099502487562,
      "grad_norm": 26.62687110900879,
      "learning_rate": 4.900497512437811e-05,
      "loss": 0.9146,
      "step": 136
    },
    {
      "epoch": 0.04009364940005853,
      "grad_norm": 1.2822232246398926,
      "learning_rate": 4.899765876499854e-05,
      "loss": 0.0047,
      "step": 137
    },
    {
      "epoch": 0.04038630377524144,
      "grad_norm": 0.004942530766129494,
      "learning_rate": 4.8990342405618965e-05,
      "loss": 0.0001,
      "step": 138
    },
    {
      "epoch": 0.04067895815042435,
      "grad_norm": 0.04992402717471123,
      "learning_rate": 4.898302604623939e-05,
      "loss": 0.0007,
      "step": 139
    },
    {
      "epoch": 0.04097161252560726,
      "grad_norm": 7.602818965911865,
      "learning_rate": 4.897570968685982e-05,
      "loss": 0.1186,
      "step": 140
    },
    {
      "epoch": 0.041264266900790166,
      "grad_norm": 5.089090347290039,
      "learning_rate": 4.896839332748025e-05,
      "loss": 0.0466,
      "step": 141
    },
    {
      "epoch": 0.041556921275973074,
      "grad_norm": 8.595590591430664,
      "learning_rate": 4.8961076968100676e-05,
      "loss": 0.1921,
      "step": 142
    },
    {
      "epoch": 0.04184957565115598,
      "grad_norm": 0.10725227743387222,
      "learning_rate": 4.8953760608721104e-05,
      "loss": 0.0013,
      "step": 143
    },
    {
      "epoch": 0.04214223002633889,
      "grad_norm": 0.0959903746843338,
      "learning_rate": 4.8946444249341526e-05,
      "loss": 0.0014,
      "step": 144
    },
    {
      "epoch": 0.0424348844015218,
      "grad_norm": 3.4017302989959717,
      "learning_rate": 4.8939127889961954e-05,
      "loss": 0.0167,
      "step": 145
    },
    {
      "epoch": 0.04272753877670471,
      "grad_norm": 1.009041666984558,
      "learning_rate": 4.893181153058238e-05,
      "loss": 0.0094,
      "step": 146
    },
    {
      "epoch": 0.04302019315188762,
      "grad_norm": 18.60599136352539,
      "learning_rate": 4.892449517120281e-05,
      "loss": 0.5468,
      "step": 147
    },
    {
      "epoch": 0.04331284752707053,
      "grad_norm": 8.110335350036621,
      "learning_rate": 4.891717881182324e-05,
      "loss": 0.3326,
      "step": 148
    },
    {
      "epoch": 0.043605501902253437,
      "grad_norm": 11.047513008117676,
      "learning_rate": 4.8909862452443665e-05,
      "loss": 0.1789,
      "step": 149
    },
    {
      "epoch": 0.043898156277436345,
      "grad_norm": 2.181462049484253,
      "learning_rate": 4.890254609306409e-05,
      "loss": 0.0298,
      "step": 150
    },
    {
      "epoch": 0.044190810652619254,
      "grad_norm": 0.3789248466491699,
      "learning_rate": 4.889522973368452e-05,
      "loss": 0.0063,
      "step": 151
    },
    {
      "epoch": 0.04448346502780216,
      "grad_norm": 3.644010305404663,
      "learning_rate": 4.888791337430495e-05,
      "loss": 0.0446,
      "step": 152
    },
    {
      "epoch": 0.04477611940298507,
      "grad_norm": 1.0178574323654175,
      "learning_rate": 4.888059701492538e-05,
      "loss": 0.012,
      "step": 153
    },
    {
      "epoch": 0.04506877377816798,
      "grad_norm": 1.6284925937652588,
      "learning_rate": 4.88732806555458e-05,
      "loss": 0.0119,
      "step": 154
    },
    {
      "epoch": 0.04536142815335089,
      "grad_norm": 0.4712274968624115,
      "learning_rate": 4.8865964296166226e-05,
      "loss": 0.0052,
      "step": 155
    },
    {
      "epoch": 0.0456540825285338,
      "grad_norm": 4.255475044250488,
      "learning_rate": 4.8858647936786654e-05,
      "loss": 0.0983,
      "step": 156
    },
    {
      "epoch": 0.04594673690371671,
      "grad_norm": 2.3100857734680176,
      "learning_rate": 4.885133157740708e-05,
      "loss": 0.0124,
      "step": 157
    },
    {
      "epoch": 0.046239391278899616,
      "grad_norm": 10.108885765075684,
      "learning_rate": 4.884401521802751e-05,
      "loss": 0.1775,
      "step": 158
    },
    {
      "epoch": 0.046532045654082525,
      "grad_norm": 0.29219287633895874,
      "learning_rate": 4.883669885864794e-05,
      "loss": 0.0025,
      "step": 159
    },
    {
      "epoch": 0.046824700029265434,
      "grad_norm": 8.007964134216309,
      "learning_rate": 4.8829382499268366e-05,
      "loss": 0.17,
      "step": 160
    },
    {
      "epoch": 0.04711735440444834,
      "grad_norm": 3.072378635406494,
      "learning_rate": 4.8822066139888794e-05,
      "loss": 0.0224,
      "step": 161
    },
    {
      "epoch": 0.04741000877963126,
      "grad_norm": 0.2821565270423889,
      "learning_rate": 4.881474978050922e-05,
      "loss": 0.0024,
      "step": 162
    },
    {
      "epoch": 0.04770266315481417,
      "grad_norm": 0.0806671753525734,
      "learning_rate": 4.880743342112965e-05,
      "loss": 0.0013,
      "step": 163
    },
    {
      "epoch": 0.04799531752999708,
      "grad_norm": 3.4239866733551025,
      "learning_rate": 4.880011706175008e-05,
      "loss": 0.1401,
      "step": 164
    },
    {
      "epoch": 0.048287971905179985,
      "grad_norm": 8.500259399414062,
      "learning_rate": 4.87928007023705e-05,
      "loss": 0.0536,
      "step": 165
    },
    {
      "epoch": 0.048580626280362894,
      "grad_norm": 11.746143341064453,
      "learning_rate": 4.878548434299093e-05,
      "loss": 0.2361,
      "step": 166
    },
    {
      "epoch": 0.0488732806555458,
      "grad_norm": 5.371679306030273,
      "learning_rate": 4.8778167983611355e-05,
      "loss": 0.0422,
      "step": 167
    },
    {
      "epoch": 0.04916593503072871,
      "grad_norm": 3.1472370624542236,
      "learning_rate": 4.877085162423178e-05,
      "loss": 0.0248,
      "step": 168
    },
    {
      "epoch": 0.04945858940591162,
      "grad_norm": 1.7468245029449463,
      "learning_rate": 4.876353526485221e-05,
      "loss": 0.0133,
      "step": 169
    },
    {
      "epoch": 0.04975124378109453,
      "grad_norm": 0.6838405132293701,
      "learning_rate": 4.875621890547264e-05,
      "loss": 0.0065,
      "step": 170
    },
    {
      "epoch": 0.05004389815627744,
      "grad_norm": 0.05950434133410454,
      "learning_rate": 4.874890254609307e-05,
      "loss": 0.0011,
      "step": 171
    },
    {
      "epoch": 0.05033655253146035,
      "grad_norm": 0.057430557906627655,
      "learning_rate": 4.8741586186713495e-05,
      "loss": 0.0007,
      "step": 172
    },
    {
      "epoch": 0.050629206906643257,
      "grad_norm": 0.7738732695579529,
      "learning_rate": 4.873426982733392e-05,
      "loss": 0.008,
      "step": 173
    },
    {
      "epoch": 0.050921861281826165,
      "grad_norm": 0.2586875855922699,
      "learning_rate": 4.872695346795435e-05,
      "loss": 0.0016,
      "step": 174
    },
    {
      "epoch": 0.051214515657009074,
      "grad_norm": 0.33069998025894165,
      "learning_rate": 4.871963710857478e-05,
      "loss": 0.0025,
      "step": 175
    },
    {
      "epoch": 0.05150717003219198,
      "grad_norm": 0.6992021799087524,
      "learning_rate": 4.87123207491952e-05,
      "loss": 0.0044,
      "step": 176
    },
    {
      "epoch": 0.05179982440737489,
      "grad_norm": 4.2504987716674805,
      "learning_rate": 4.870500438981563e-05,
      "loss": 0.1317,
      "step": 177
    },
    {
      "epoch": 0.0520924787825578,
      "grad_norm": 5.421692848205566,
      "learning_rate": 4.8697688030436056e-05,
      "loss": 0.0209,
      "step": 178
    },
    {
      "epoch": 0.05238513315774071,
      "grad_norm": 0.11863663792610168,
      "learning_rate": 4.8690371671056484e-05,
      "loss": 0.0016,
      "step": 179
    },
    {
      "epoch": 0.05267778753292362,
      "grad_norm": 11.441097259521484,
      "learning_rate": 4.868305531167691e-05,
      "loss": 0.0851,
      "step": 180
    },
    {
      "epoch": 0.05297044190810653,
      "grad_norm": 0.1788787990808487,
      "learning_rate": 4.867573895229734e-05,
      "loss": 0.0018,
      "step": 181
    },
    {
      "epoch": 0.053263096283289436,
      "grad_norm": 3.151461362838745,
      "learning_rate": 4.866842259291777e-05,
      "loss": 0.0311,
      "step": 182
    },
    {
      "epoch": 0.053555750658472345,
      "grad_norm": 0.7903239130973816,
      "learning_rate": 4.8661106233538195e-05,
      "loss": 0.0074,
      "step": 183
    },
    {
      "epoch": 0.053848405033655254,
      "grad_norm": 0.18016786873340607,
      "learning_rate": 4.865378987415862e-05,
      "loss": 0.0021,
      "step": 184
    },
    {
      "epoch": 0.05414105940883816,
      "grad_norm": 0.02187931537628174,
      "learning_rate": 4.864647351477905e-05,
      "loss": 0.0003,
      "step": 185
    },
    {
      "epoch": 0.05443371378402107,
      "grad_norm": 0.025294115766882896,
      "learning_rate": 4.863915715539947e-05,
      "loss": 0.0004,
      "step": 186
    },
    {
      "epoch": 0.05472636815920398,
      "grad_norm": 0.031194772571325302,
      "learning_rate": 4.86318407960199e-05,
      "loss": 0.0002,
      "step": 187
    },
    {
      "epoch": 0.05501902253438689,
      "grad_norm": 1.7829680442810059,
      "learning_rate": 4.862452443664033e-05,
      "loss": 0.0045,
      "step": 188
    },
    {
      "epoch": 0.0553116769095698,
      "grad_norm": 0.01780118979513645,
      "learning_rate": 4.8617208077260756e-05,
      "loss": 0.0002,
      "step": 189
    },
    {
      "epoch": 0.05560433128475271,
      "grad_norm": 0.2539866864681244,
      "learning_rate": 4.8609891717881184e-05,
      "loss": 0.0012,
      "step": 190
    },
    {
      "epoch": 0.055896985659935616,
      "grad_norm": 0.10223159939050674,
      "learning_rate": 4.860257535850161e-05,
      "loss": 0.0004,
      "step": 191
    },
    {
      "epoch": 0.056189640035118525,
      "grad_norm": 0.012053336016833782,
      "learning_rate": 4.859525899912204e-05,
      "loss": 0.0001,
      "step": 192
    },
    {
      "epoch": 0.056482294410301434,
      "grad_norm": 1.3330971002578735,
      "learning_rate": 4.858794263974247e-05,
      "loss": 0.0059,
      "step": 193
    },
    {
      "epoch": 0.05677494878548434,
      "grad_norm": 0.20015238225460052,
      "learning_rate": 4.8580626280362896e-05,
      "loss": 0.0008,
      "step": 194
    },
    {
      "epoch": 0.05706760316066725,
      "grad_norm": 0.0025256345979869366,
      "learning_rate": 4.8573309920983324e-05,
      "loss": 0.0001,
      "step": 195
    },
    {
      "epoch": 0.05736025753585016,
      "grad_norm": 0.008162226527929306,
      "learning_rate": 4.856599356160375e-05,
      "loss": 0.0001,
      "step": 196
    },
    {
      "epoch": 0.05765291191103307,
      "grad_norm": 0.002979808719828725,
      "learning_rate": 4.855867720222417e-05,
      "loss": 0.0,
      "step": 197
    },
    {
      "epoch": 0.05794556628621598,
      "grad_norm": 11.365145683288574,
      "learning_rate": 4.85513608428446e-05,
      "loss": 0.0807,
      "step": 198
    },
    {
      "epoch": 0.05823822066139889,
      "grad_norm": 3.422775983810425,
      "learning_rate": 4.854404448346503e-05,
      "loss": 0.0053,
      "step": 199
    },
    {
      "epoch": 0.058530875036581796,
      "grad_norm": 0.18176983296871185,
      "learning_rate": 4.853672812408546e-05,
      "loss": 0.0003,
      "step": 200
    },
    {
      "epoch": 0.058823529411764705,
      "grad_norm": 0.019310927018523216,
      "learning_rate": 4.8529411764705885e-05,
      "loss": 0.0001,
      "step": 201
    },
    {
      "epoch": 0.059116183786947614,
      "grad_norm": 0.0014688526280224323,
      "learning_rate": 4.852209540532631e-05,
      "loss": 0.0,
      "step": 202
    },
    {
      "epoch": 0.05940883816213052,
      "grad_norm": 11.9734468460083,
      "learning_rate": 4.851477904594674e-05,
      "loss": 0.0195,
      "step": 203
    },
    {
      "epoch": 0.05970149253731343,
      "grad_norm": 0.0035654143430292606,
      "learning_rate": 4.850746268656717e-05,
      "loss": 0.0001,
      "step": 204
    },
    {
      "epoch": 0.05999414691249634,
      "grad_norm": 0.23448404669761658,
      "learning_rate": 4.85001463271876e-05,
      "loss": 0.0011,
      "step": 205
    },
    {
      "epoch": 0.06028680128767925,
      "grad_norm": 0.01675930805504322,
      "learning_rate": 4.8492829967808025e-05,
      "loss": 0.0001,
      "step": 206
    },
    {
      "epoch": 0.06057945566286216,
      "grad_norm": 0.026205426082015038,
      "learning_rate": 4.8485513608428446e-05,
      "loss": 0.0001,
      "step": 207
    },
    {
      "epoch": 0.06087211003804507,
      "grad_norm": 0.7105171084403992,
      "learning_rate": 4.8478197249048874e-05,
      "loss": 0.0013,
      "step": 208
    },
    {
      "epoch": 0.061164764413227976,
      "grad_norm": 28.973526000976562,
      "learning_rate": 4.84708808896693e-05,
      "loss": 0.0618,
      "step": 209
    },
    {
      "epoch": 0.061457418788410885,
      "grad_norm": 8.843789100646973,
      "learning_rate": 4.846356453028973e-05,
      "loss": 0.13,
      "step": 210
    },
    {
      "epoch": 0.061750073163593794,
      "grad_norm": 0.002778812311589718,
      "learning_rate": 4.845624817091016e-05,
      "loss": 0.0,
      "step": 211
    },
    {
      "epoch": 0.0620427275387767,
      "grad_norm": 0.0018273483728989959,
      "learning_rate": 4.8448931811530586e-05,
      "loss": 0.0,
      "step": 212
    },
    {
      "epoch": 0.06233538191395961,
      "grad_norm": 0.0318334624171257,
      "learning_rate": 4.8441615452151014e-05,
      "loss": 0.0002,
      "step": 213
    },
    {
      "epoch": 0.06262803628914253,
      "grad_norm": 0.043673787266016006,
      "learning_rate": 4.843429909277144e-05,
      "loss": 0.0002,
      "step": 214
    },
    {
      "epoch": 0.06292069066432543,
      "grad_norm": 0.023164449259638786,
      "learning_rate": 4.842698273339187e-05,
      "loss": 0.0002,
      "step": 215
    },
    {
      "epoch": 0.06321334503950835,
      "grad_norm": 0.02091466635465622,
      "learning_rate": 4.84196663740123e-05,
      "loss": 0.0001,
      "step": 216
    },
    {
      "epoch": 0.06350599941469125,
      "grad_norm": 3.7927169799804688,
      "learning_rate": 4.8412350014632725e-05,
      "loss": 0.0116,
      "step": 217
    },
    {
      "epoch": 0.06379865378987416,
      "grad_norm": 0.012715407647192478,
      "learning_rate": 4.8405033655253146e-05,
      "loss": 0.0001,
      "step": 218
    },
    {
      "epoch": 0.06409130816505706,
      "grad_norm": 0.008732621558010578,
      "learning_rate": 4.8397717295873574e-05,
      "loss": 0.0001,
      "step": 219
    },
    {
      "epoch": 0.06438396254023998,
      "grad_norm": 13.74286937713623,
      "learning_rate": 4.8390400936494e-05,
      "loss": 0.1818,
      "step": 220
    },
    {
      "epoch": 0.06467661691542288,
      "grad_norm": 0.00698635121807456,
      "learning_rate": 4.838308457711443e-05,
      "loss": 0.0,
      "step": 221
    },
    {
      "epoch": 0.0649692712906058,
      "grad_norm": 13.672441482543945,
      "learning_rate": 4.837576821773486e-05,
      "loss": 0.0313,
      "step": 222
    },
    {
      "epoch": 0.0652619256657887,
      "grad_norm": 0.004722634330391884,
      "learning_rate": 4.8368451858355286e-05,
      "loss": 0.0,
      "step": 223
    },
    {
      "epoch": 0.06555458004097162,
      "grad_norm": 0.017278265208005905,
      "learning_rate": 4.8361135498975714e-05,
      "loss": 0.0001,
      "step": 224
    },
    {
      "epoch": 0.06584723441615452,
      "grad_norm": 0.5335111021995544,
      "learning_rate": 4.835381913959614e-05,
      "loss": 0.002,
      "step": 225
    },
    {
      "epoch": 0.06613988879133743,
      "grad_norm": 0.0020765329245477915,
      "learning_rate": 4.834650278021657e-05,
      "loss": 0.0,
      "step": 226
    },
    {
      "epoch": 0.06643254316652034,
      "grad_norm": 0.004643509164452553,
      "learning_rate": 4.8339186420837e-05,
      "loss": 0.0,
      "step": 227
    },
    {
      "epoch": 0.06672519754170325,
      "grad_norm": 5.808701992034912,
      "learning_rate": 4.8331870061457426e-05,
      "loss": 0.2854,
      "step": 228
    },
    {
      "epoch": 0.06701785191688615,
      "grad_norm": 0.0037125300150364637,
      "learning_rate": 4.832455370207785e-05,
      "loss": 0.0,
      "step": 229
    },
    {
      "epoch": 0.06731050629206907,
      "grad_norm": 0.017818965017795563,
      "learning_rate": 4.8317237342698275e-05,
      "loss": 0.0002,
      "step": 230
    },
    {
      "epoch": 0.06760316066725197,
      "grad_norm": 0.001532245078124106,
      "learning_rate": 4.83099209833187e-05,
      "loss": 0.0,
      "step": 231
    },
    {
      "epoch": 0.06789581504243489,
      "grad_norm": 0.17457996308803558,
      "learning_rate": 4.830260462393913e-05,
      "loss": 0.0007,
      "step": 232
    },
    {
      "epoch": 0.06818846941761779,
      "grad_norm": 8.137628555297852,
      "learning_rate": 4.829528826455956e-05,
      "loss": 0.2625,
      "step": 233
    },
    {
      "epoch": 0.0684811237928007,
      "grad_norm": 0.002676829230040312,
      "learning_rate": 4.828797190517999e-05,
      "loss": 0.0,
      "step": 234
    },
    {
      "epoch": 0.0687737781679836,
      "grad_norm": 0.7786100506782532,
      "learning_rate": 4.8280655545800415e-05,
      "loss": 0.0022,
      "step": 235
    },
    {
      "epoch": 0.06906643254316652,
      "grad_norm": 0.0047471970319747925,
      "learning_rate": 4.827333918642084e-05,
      "loss": 0.0,
      "step": 236
    },
    {
      "epoch": 0.06935908691834942,
      "grad_norm": 0.0006819628761149943,
      "learning_rate": 4.826602282704127e-05,
      "loss": 0.0,
      "step": 237
    },
    {
      "epoch": 0.06965174129353234,
      "grad_norm": 18.804973602294922,
      "learning_rate": 4.82587064676617e-05,
      "loss": 0.2542,
      "step": 238
    },
    {
      "epoch": 0.06994439566871524,
      "grad_norm": 0.018086520954966545,
      "learning_rate": 4.825139010828212e-05,
      "loss": 0.0001,
      "step": 239
    },
    {
      "epoch": 0.07023705004389816,
      "grad_norm": 0.01671360246837139,
      "learning_rate": 4.824407374890255e-05,
      "loss": 0.0001,
      "step": 240
    },
    {
      "epoch": 0.07052970441908106,
      "grad_norm": 11.613852500915527,
      "learning_rate": 4.8236757389522976e-05,
      "loss": 0.238,
      "step": 241
    },
    {
      "epoch": 0.07082235879426398,
      "grad_norm": 0.00957377627491951,
      "learning_rate": 4.8229441030143404e-05,
      "loss": 0.0001,
      "step": 242
    },
    {
      "epoch": 0.07111501316944688,
      "grad_norm": 1.9750438928604126,
      "learning_rate": 4.822212467076383e-05,
      "loss": 0.0119,
      "step": 243
    },
    {
      "epoch": 0.0714076675446298,
      "grad_norm": 0.006536509841680527,
      "learning_rate": 4.821480831138426e-05,
      "loss": 0.0001,
      "step": 244
    },
    {
      "epoch": 0.0717003219198127,
      "grad_norm": 0.0057691894471645355,
      "learning_rate": 4.820749195200469e-05,
      "loss": 0.0001,
      "step": 245
    },
    {
      "epoch": 0.07199297629499561,
      "grad_norm": 0.23802554607391357,
      "learning_rate": 4.8200175592625116e-05,
      "loss": 0.001,
      "step": 246
    },
    {
      "epoch": 0.07228563067017851,
      "grad_norm": 0.005101884715259075,
      "learning_rate": 4.8192859233245543e-05,
      "loss": 0.0001,
      "step": 247
    },
    {
      "epoch": 0.07257828504536143,
      "grad_norm": 0.0271473191678524,
      "learning_rate": 4.818554287386597e-05,
      "loss": 0.0004,
      "step": 248
    },
    {
      "epoch": 0.07287093942054433,
      "grad_norm": 0.3413756191730499,
      "learning_rate": 4.81782265144864e-05,
      "loss": 0.0024,
      "step": 249
    },
    {
      "epoch": 0.07316359379572725,
      "grad_norm": 5.48157262802124,
      "learning_rate": 4.817091015510682e-05,
      "loss": 0.0218,
      "step": 250
    },
    {
      "epoch": 0.07345624817091015,
      "grad_norm": 2.417241096496582,
      "learning_rate": 4.816359379572725e-05,
      "loss": 0.0151,
      "step": 251
    },
    {
      "epoch": 0.07374890254609306,
      "grad_norm": 3.9786200523376465,
      "learning_rate": 4.8156277436347676e-05,
      "loss": 0.0151,
      "step": 252
    },
    {
      "epoch": 0.07404155692127597,
      "grad_norm": 7.795351028442383,
      "learning_rate": 4.8148961076968104e-05,
      "loss": 0.1096,
      "step": 253
    },
    {
      "epoch": 0.07433421129645888,
      "grad_norm": 0.010204765945672989,
      "learning_rate": 4.814164471758853e-05,
      "loss": 0.0002,
      "step": 254
    },
    {
      "epoch": 0.07462686567164178,
      "grad_norm": 5.5745391845703125,
      "learning_rate": 4.813432835820896e-05,
      "loss": 0.0285,
      "step": 255
    },
    {
      "epoch": 0.0749195200468247,
      "grad_norm": 0.013587488792836666,
      "learning_rate": 4.812701199882939e-05,
      "loss": 0.0002,
      "step": 256
    },
    {
      "epoch": 0.0752121744220076,
      "grad_norm": 0.001782214269042015,
      "learning_rate": 4.8119695639449816e-05,
      "loss": 0.0,
      "step": 257
    },
    {
      "epoch": 0.07550482879719052,
      "grad_norm": 0.02017296850681305,
      "learning_rate": 4.8112379280070244e-05,
      "loss": 0.0002,
      "step": 258
    },
    {
      "epoch": 0.07579748317237343,
      "grad_norm": 12.804062843322754,
      "learning_rate": 4.810506292069067e-05,
      "loss": 0.185,
      "step": 259
    },
    {
      "epoch": 0.07609013754755634,
      "grad_norm": 0.011184900999069214,
      "learning_rate": 4.809774656131109e-05,
      "loss": 0.0002,
      "step": 260
    },
    {
      "epoch": 0.07638279192273925,
      "grad_norm": 0.5110180377960205,
      "learning_rate": 4.809043020193152e-05,
      "loss": 0.0034,
      "step": 261
    },
    {
      "epoch": 0.07667544629792215,
      "grad_norm": 0.5023605227470398,
      "learning_rate": 4.808311384255195e-05,
      "loss": 0.0035,
      "step": 262
    },
    {
      "epoch": 0.07696810067310507,
      "grad_norm": 1.2688344717025757,
      "learning_rate": 4.807579748317238e-05,
      "loss": 0.0092,
      "step": 263
    },
    {
      "epoch": 0.07726075504828797,
      "grad_norm": 0.09958023577928543,
      "learning_rate": 4.8068481123792805e-05,
      "loss": 0.0004,
      "step": 264
    },
    {
      "epoch": 0.07755340942347089,
      "grad_norm": 0.02350887842476368,
      "learning_rate": 4.806116476441323e-05,
      "loss": 0.0002,
      "step": 265
    },
    {
      "epoch": 0.07784606379865379,
      "grad_norm": 9.163583755493164,
      "learning_rate": 4.805384840503366e-05,
      "loss": 0.0357,
      "step": 266
    },
    {
      "epoch": 0.0781387181738367,
      "grad_norm": 0.0023382732179015875,
      "learning_rate": 4.804653204565409e-05,
      "loss": 0.0,
      "step": 267
    },
    {
      "epoch": 0.0784313725490196,
      "grad_norm": 12.967851638793945,
      "learning_rate": 4.803921568627452e-05,
      "loss": 0.1247,
      "step": 268
    },
    {
      "epoch": 0.07872402692420252,
      "grad_norm": 0.0019798458088189363,
      "learning_rate": 4.803189932689494e-05,
      "loss": 0.0,
      "step": 269
    },
    {
      "epoch": 0.07901668129938542,
      "grad_norm": 0.06770626455545425,
      "learning_rate": 4.8024582967515366e-05,
      "loss": 0.0004,
      "step": 270
    },
    {
      "epoch": 0.07930933567456834,
      "grad_norm": 0.08193594962358475,
      "learning_rate": 4.8017266608135794e-05,
      "loss": 0.0005,
      "step": 271
    },
    {
      "epoch": 0.07960199004975124,
      "grad_norm": 15.774731636047363,
      "learning_rate": 4.800995024875622e-05,
      "loss": 0.1289,
      "step": 272
    },
    {
      "epoch": 0.07989464442493416,
      "grad_norm": 3.4991703033447266,
      "learning_rate": 4.800263388937665e-05,
      "loss": 0.0112,
      "step": 273
    },
    {
      "epoch": 0.08018729880011706,
      "grad_norm": 0.045116156339645386,
      "learning_rate": 4.799531752999708e-05,
      "loss": 0.0004,
      "step": 274
    },
    {
      "epoch": 0.08047995317529998,
      "grad_norm": 0.13101589679718018,
      "learning_rate": 4.7988001170617506e-05,
      "loss": 0.0006,
      "step": 275
    },
    {
      "epoch": 0.08077260755048288,
      "grad_norm": 0.08923770487308502,
      "learning_rate": 4.7980684811237934e-05,
      "loss": 0.0006,
      "step": 276
    },
    {
      "epoch": 0.0810652619256658,
      "grad_norm": 0.7768778204917908,
      "learning_rate": 4.7973368451858355e-05,
      "loss": 0.0019,
      "step": 277
    },
    {
      "epoch": 0.0813579163008487,
      "grad_norm": 0.010617797262966633,
      "learning_rate": 4.796605209247878e-05,
      "loss": 0.0001,
      "step": 278
    },
    {
      "epoch": 0.08165057067603161,
      "grad_norm": 1.319770336151123,
      "learning_rate": 4.795873573309921e-05,
      "loss": 0.0037,
      "step": 279
    },
    {
      "epoch": 0.08194322505121451,
      "grad_norm": 1.0644629001617432,
      "learning_rate": 4.795141937371964e-05,
      "loss": 0.002,
      "step": 280
    },
    {
      "epoch": 0.08223587942639743,
      "grad_norm": 5.610535144805908,
      "learning_rate": 4.794410301434007e-05,
      "loss": 0.0108,
      "step": 281
    },
    {
      "epoch": 0.08252853380158033,
      "grad_norm": 0.0032586927991360426,
      "learning_rate": 4.7936786654960495e-05,
      "loss": 0.0,
      "step": 282
    },
    {
      "epoch": 0.08282118817676325,
      "grad_norm": 0.0010933985468000174,
      "learning_rate": 4.792947029558092e-05,
      "loss": 0.0,
      "step": 283
    },
    {
      "epoch": 0.08311384255194615,
      "grad_norm": 0.0033725625835359097,
      "learning_rate": 4.792215393620135e-05,
      "loss": 0.0001,
      "step": 284
    },
    {
      "epoch": 0.08340649692712906,
      "grad_norm": 0.05476341396570206,
      "learning_rate": 4.791483757682177e-05,
      "loss": 0.0002,
      "step": 285
    },
    {
      "epoch": 0.08369915130231197,
      "grad_norm": 0.0005282636848278344,
      "learning_rate": 4.79075212174422e-05,
      "loss": 0.0,
      "step": 286
    },
    {
      "epoch": 0.08399180567749488,
      "grad_norm": 2.6202609539031982,
      "learning_rate": 4.790020485806263e-05,
      "loss": 0.3312,
      "step": 287
    },
    {
      "epoch": 0.08428446005267778,
      "grad_norm": 0.24636919796466827,
      "learning_rate": 4.7892888498683056e-05,
      "loss": 0.0005,
      "step": 288
    },
    {
      "epoch": 0.0845771144278607,
      "grad_norm": 12.588053703308105,
      "learning_rate": 4.7885572139303484e-05,
      "loss": 0.0787,
      "step": 289
    },
    {
      "epoch": 0.0848697688030436,
      "grad_norm": 2.8286802768707275,
      "learning_rate": 4.787825577992391e-05,
      "loss": 0.0063,
      "step": 290
    },
    {
      "epoch": 0.08516242317822652,
      "grad_norm": 0.04766281321644783,
      "learning_rate": 4.787093942054434e-05,
      "loss": 0.0004,
      "step": 291
    },
    {
      "epoch": 0.08545507755340942,
      "grad_norm": 0.8390278816223145,
      "learning_rate": 4.786362306116477e-05,
      "loss": 0.004,
      "step": 292
    },
    {
      "epoch": 0.08574773192859234,
      "grad_norm": 7.186341762542725,
      "learning_rate": 4.785630670178519e-05,
      "loss": 0.0437,
      "step": 293
    },
    {
      "epoch": 0.08604038630377524,
      "grad_norm": 9.822579383850098,
      "learning_rate": 4.7848990342405616e-05,
      "loss": 0.0769,
      "step": 294
    },
    {
      "epoch": 0.08633304067895815,
      "grad_norm": 0.16112007200717926,
      "learning_rate": 4.7841673983026044e-05,
      "loss": 0.0017,
      "step": 295
    },
    {
      "epoch": 0.08662569505414106,
      "grad_norm": 0.14990444481372833,
      "learning_rate": 4.783435762364647e-05,
      "loss": 0.0012,
      "step": 296
    },
    {
      "epoch": 0.08691834942932397,
      "grad_norm": 0.037165552377700806,
      "learning_rate": 4.78270412642669e-05,
      "loss": 0.0006,
      "step": 297
    },
    {
      "epoch": 0.08721100380450687,
      "grad_norm": 0.586540937423706,
      "learning_rate": 4.781972490488733e-05,
      "loss": 0.0012,
      "step": 298
    },
    {
      "epoch": 0.08750365817968979,
      "grad_norm": 0.014206857420504093,
      "learning_rate": 4.7812408545507756e-05,
      "loss": 0.0003,
      "step": 299
    },
    {
      "epoch": 0.08779631255487269,
      "grad_norm": 1.0259332656860352,
      "learning_rate": 4.7805092186128184e-05,
      "loss": 0.0019,
      "step": 300
    },
    {
      "epoch": 0.0880889669300556,
      "grad_norm": 0.010009337216615677,
      "learning_rate": 4.779777582674861e-05,
      "loss": 0.0002,
      "step": 301
    },
    {
      "epoch": 0.08838162130523851,
      "grad_norm": 0.0075365579687058926,
      "learning_rate": 4.779045946736904e-05,
      "loss": 0.0002,
      "step": 302
    },
    {
      "epoch": 0.08867427568042142,
      "grad_norm": 12.360414505004883,
      "learning_rate": 4.778314310798946e-05,
      "loss": 0.1413,
      "step": 303
    },
    {
      "epoch": 0.08896693005560433,
      "grad_norm": 0.008117208257317543,
      "learning_rate": 4.777582674860989e-05,
      "loss": 0.0002,
      "step": 304
    },
    {
      "epoch": 0.08925958443078724,
      "grad_norm": 0.022172026336193085,
      "learning_rate": 4.776851038923032e-05,
      "loss": 0.0003,
      "step": 305
    },
    {
      "epoch": 0.08955223880597014,
      "grad_norm": 0.008608819916844368,
      "learning_rate": 4.7761194029850745e-05,
      "loss": 0.0002,
      "step": 306
    },
    {
      "epoch": 0.08984489318115306,
      "grad_norm": 0.021730391308665276,
      "learning_rate": 4.775387767047117e-05,
      "loss": 0.0002,
      "step": 307
    },
    {
      "epoch": 0.09013754755633596,
      "grad_norm": 0.7678189873695374,
      "learning_rate": 4.77465613110916e-05,
      "loss": 0.0024,
      "step": 308
    },
    {
      "epoch": 0.09043020193151888,
      "grad_norm": 0.003530798014253378,
      "learning_rate": 4.773924495171203e-05,
      "loss": 0.0001,
      "step": 309
    },
    {
      "epoch": 0.09072285630670178,
      "grad_norm": 0.10175793617963791,
      "learning_rate": 4.773192859233246e-05,
      "loss": 0.0005,
      "step": 310
    },
    {
      "epoch": 0.0910155106818847,
      "grad_norm": 0.005056069698184729,
      "learning_rate": 4.7724612232952885e-05,
      "loss": 0.0001,
      "step": 311
    },
    {
      "epoch": 0.0913081650570676,
      "grad_norm": 0.006732292473316193,
      "learning_rate": 4.771729587357331e-05,
      "loss": 0.0001,
      "step": 312
    },
    {
      "epoch": 0.09160081943225051,
      "grad_norm": 0.005457394290715456,
      "learning_rate": 4.770997951419374e-05,
      "loss": 0.0001,
      "step": 313
    },
    {
      "epoch": 0.09189347380743342,
      "grad_norm": 0.04522399604320526,
      "learning_rate": 4.770266315481416e-05,
      "loss": 0.0003,
      "step": 314
    },
    {
      "epoch": 0.09218612818261633,
      "grad_norm": 0.007834223099052906,
      "learning_rate": 4.769534679543459e-05,
      "loss": 0.0002,
      "step": 315
    },
    {
      "epoch": 0.09247878255779923,
      "grad_norm": 0.01890292391180992,
      "learning_rate": 4.768803043605502e-05,
      "loss": 0.0003,
      "step": 316
    },
    {
      "epoch": 0.09277143693298215,
      "grad_norm": 0.016568325459957123,
      "learning_rate": 4.7680714076675446e-05,
      "loss": 0.0002,
      "step": 317
    },
    {
      "epoch": 0.09306409130816505,
      "grad_norm": 0.08477871865034103,
      "learning_rate": 4.7673397717295874e-05,
      "loss": 0.0004,
      "step": 318
    },
    {
      "epoch": 0.09335674568334797,
      "grad_norm": 0.012408802285790443,
      "learning_rate": 4.76660813579163e-05,
      "loss": 0.0002,
      "step": 319
    },
    {
      "epoch": 0.09364940005853087,
      "grad_norm": 0.061891306191682816,
      "learning_rate": 4.765876499853673e-05,
      "loss": 0.0003,
      "step": 320
    },
    {
      "epoch": 0.09394205443371378,
      "grad_norm": 4.866892337799072,
      "learning_rate": 4.765144863915716e-05,
      "loss": 0.248,
      "step": 321
    },
    {
      "epoch": 0.09423470880889669,
      "grad_norm": 4.284445762634277,
      "learning_rate": 4.7644132279777586e-05,
      "loss": 0.0087,
      "step": 322
    },
    {
      "epoch": 0.0945273631840796,
      "grad_norm": 0.009381483308970928,
      "learning_rate": 4.7636815920398013e-05,
      "loss": 0.0002,
      "step": 323
    },
    {
      "epoch": 0.09482001755926252,
      "grad_norm": 13.310781478881836,
      "learning_rate": 4.7629499561018435e-05,
      "loss": 0.1202,
      "step": 324
    },
    {
      "epoch": 0.09511267193444542,
      "grad_norm": 23.32094383239746,
      "learning_rate": 4.762218320163886e-05,
      "loss": 0.1079,
      "step": 325
    },
    {
      "epoch": 0.09540532630962834,
      "grad_norm": 0.003830630797892809,
      "learning_rate": 4.761486684225929e-05,
      "loss": 0.0001,
      "step": 326
    },
    {
      "epoch": 0.09569798068481124,
      "grad_norm": 0.002424234990030527,
      "learning_rate": 4.760755048287972e-05,
      "loss": 0.0001,
      "step": 327
    },
    {
      "epoch": 0.09599063505999415,
      "grad_norm": 0.23865213990211487,
      "learning_rate": 4.7600234123500146e-05,
      "loss": 0.0009,
      "step": 328
    },
    {
      "epoch": 0.09628328943517706,
      "grad_norm": 0.004662353079766035,
      "learning_rate": 4.7592917764120574e-05,
      "loss": 0.0001,
      "step": 329
    },
    {
      "epoch": 0.09657594381035997,
      "grad_norm": 0.05005013570189476,
      "learning_rate": 4.7585601404741e-05,
      "loss": 0.0004,
      "step": 330
    },
    {
      "epoch": 0.09686859818554287,
      "grad_norm": 0.5038985013961792,
      "learning_rate": 4.757828504536143e-05,
      "loss": 0.0027,
      "step": 331
    },
    {
      "epoch": 0.09716125256072579,
      "grad_norm": 0.2572520077228546,
      "learning_rate": 4.757096868598186e-05,
      "loss": 0.001,
      "step": 332
    },
    {
      "epoch": 0.09745390693590869,
      "grad_norm": 3.4524786472320557,
      "learning_rate": 4.7563652326602286e-05,
      "loss": 0.0173,
      "step": 333
    },
    {
      "epoch": 0.0977465613110916,
      "grad_norm": 12.419319152832031,
      "learning_rate": 4.7556335967222714e-05,
      "loss": 0.1363,
      "step": 334
    },
    {
      "epoch": 0.09803921568627451,
      "grad_norm": 2.271554470062256,
      "learning_rate": 4.7549019607843135e-05,
      "loss": 0.0058,
      "step": 335
    },
    {
      "epoch": 0.09833187006145742,
      "grad_norm": 14.14339542388916,
      "learning_rate": 4.754170324846356e-05,
      "loss": 0.0725,
      "step": 336
    },
    {
      "epoch": 0.09862452443664033,
      "grad_norm": 5.047306537628174,
      "learning_rate": 4.753438688908399e-05,
      "loss": 0.0749,
      "step": 337
    },
    {
      "epoch": 0.09891717881182324,
      "grad_norm": 4.678935527801514,
      "learning_rate": 4.752707052970442e-05,
      "loss": 0.0092,
      "step": 338
    },
    {
      "epoch": 0.09920983318700614,
      "grad_norm": 0.040029630064964294,
      "learning_rate": 4.751975417032485e-05,
      "loss": 0.0005,
      "step": 339
    },
    {
      "epoch": 0.09950248756218906,
      "grad_norm": 0.9745525121688843,
      "learning_rate": 4.7512437810945275e-05,
      "loss": 0.0035,
      "step": 340
    },
    {
      "epoch": 0.09979514193737196,
      "grad_norm": 0.2308918535709381,
      "learning_rate": 4.75051214515657e-05,
      "loss": 0.0011,
      "step": 341
    },
    {
      "epoch": 0.10008779631255488,
      "grad_norm": 0.024273114278912544,
      "learning_rate": 4.749780509218613e-05,
      "loss": 0.0003,
      "step": 342
    },
    {
      "epoch": 0.10038045068773778,
      "grad_norm": 0.14329658448696136,
      "learning_rate": 4.749048873280656e-05,
      "loss": 0.0008,
      "step": 343
    },
    {
      "epoch": 0.1006731050629207,
      "grad_norm": 0.5504376292228699,
      "learning_rate": 4.748317237342699e-05,
      "loss": 0.0023,
      "step": 344
    },
    {
      "epoch": 0.1009657594381036,
      "grad_norm": 0.07725141942501068,
      "learning_rate": 4.747585601404741e-05,
      "loss": 0.0004,
      "step": 345
    },
    {
      "epoch": 0.10125841381328651,
      "grad_norm": 0.004685727413743734,
      "learning_rate": 4.7468539654667836e-05,
      "loss": 0.0001,
      "step": 346
    },
    {
      "epoch": 0.10155106818846941,
      "grad_norm": 5.258609771728516,
      "learning_rate": 4.7461223295288264e-05,
      "loss": 0.0163,
      "step": 347
    },
    {
      "epoch": 0.10184372256365233,
      "grad_norm": 0.005556050688028336,
      "learning_rate": 4.745390693590869e-05,
      "loss": 0.0001,
      "step": 348
    },
    {
      "epoch": 0.10213637693883523,
      "grad_norm": 0.02145569585263729,
      "learning_rate": 4.744659057652912e-05,
      "loss": 0.0002,
      "step": 349
    },
    {
      "epoch": 0.10242903131401815,
      "grad_norm": 0.01085591223090887,
      "learning_rate": 4.743927421714955e-05,
      "loss": 0.0001,
      "step": 350
    },
    {
      "epoch": 0.10272168568920105,
      "grad_norm": 0.05303160846233368,
      "learning_rate": 4.7431957857769976e-05,
      "loss": 0.0005,
      "step": 351
    },
    {
      "epoch": 0.10301434006438397,
      "grad_norm": 8.427594184875488,
      "learning_rate": 4.7424641498390404e-05,
      "loss": 0.1494,
      "step": 352
    },
    {
      "epoch": 0.10330699443956687,
      "grad_norm": 0.0912499874830246,
      "learning_rate": 4.741732513901083e-05,
      "loss": 0.0007,
      "step": 353
    },
    {
      "epoch": 0.10359964881474978,
      "grad_norm": 0.004164704121649265,
      "learning_rate": 4.741000877963126e-05,
      "loss": 0.0001,
      "step": 354
    },
    {
      "epoch": 0.10389230318993269,
      "grad_norm": 0.5572128295898438,
      "learning_rate": 4.740269242025169e-05,
      "loss": 0.0034,
      "step": 355
    },
    {
      "epoch": 0.1041849575651156,
      "grad_norm": 8.62136459350586,
      "learning_rate": 4.739537606087211e-05,
      "loss": 0.0587,
      "step": 356
    },
    {
      "epoch": 0.1044776119402985,
      "grad_norm": 2.725903272628784,
      "learning_rate": 4.738805970149254e-05,
      "loss": 0.0086,
      "step": 357
    },
    {
      "epoch": 0.10477026631548142,
      "grad_norm": 0.0027688341215252876,
      "learning_rate": 4.7380743342112965e-05,
      "loss": 0.0001,
      "step": 358
    },
    {
      "epoch": 0.10506292069066432,
      "grad_norm": 1.1569620370864868,
      "learning_rate": 4.737342698273339e-05,
      "loss": 0.0032,
      "step": 359
    },
    {
      "epoch": 0.10535557506584724,
      "grad_norm": 0.006273103877902031,
      "learning_rate": 4.736611062335382e-05,
      "loss": 0.0001,
      "step": 360
    },
    {
      "epoch": 0.10564822944103014,
      "grad_norm": 13.408610343933105,
      "learning_rate": 4.735879426397425e-05,
      "loss": 0.2888,
      "step": 361
    },
    {
      "epoch": 0.10594088381621306,
      "grad_norm": 4.661241054534912,
      "learning_rate": 4.7351477904594676e-05,
      "loss": 0.2044,
      "step": 362
    },
    {
      "epoch": 0.10623353819139596,
      "grad_norm": 6.516336441040039,
      "learning_rate": 4.7344161545215104e-05,
      "loss": 0.1134,
      "step": 363
    },
    {
      "epoch": 0.10652619256657887,
      "grad_norm": 0.00893083680421114,
      "learning_rate": 4.733684518583553e-05,
      "loss": 0.0002,
      "step": 364
    },
    {
      "epoch": 0.10681884694176177,
      "grad_norm": 0.010455523617565632,
      "learning_rate": 4.732952882645596e-05,
      "loss": 0.0002,
      "step": 365
    },
    {
      "epoch": 0.10711150131694469,
      "grad_norm": 6.539015293121338,
      "learning_rate": 4.732221246707639e-05,
      "loss": 0.0543,
      "step": 366
    },
    {
      "epoch": 0.10740415569212759,
      "grad_norm": 0.01176715549081564,
      "learning_rate": 4.731489610769681e-05,
      "loss": 0.0002,
      "step": 367
    },
    {
      "epoch": 0.10769681006731051,
      "grad_norm": 4.113800048828125,
      "learning_rate": 4.730757974831724e-05,
      "loss": 0.0278,
      "step": 368
    },
    {
      "epoch": 0.10798946444249341,
      "grad_norm": 11.07836627960205,
      "learning_rate": 4.7300263388937665e-05,
      "loss": 0.0611,
      "step": 369
    },
    {
      "epoch": 0.10828211881767633,
      "grad_norm": 0.5808396339416504,
      "learning_rate": 4.729294702955809e-05,
      "loss": 0.0023,
      "step": 370
    },
    {
      "epoch": 0.10857477319285923,
      "grad_norm": 0.02736765518784523,
      "learning_rate": 4.728563067017852e-05,
      "loss": 0.0003,
      "step": 371
    },
    {
      "epoch": 0.10886742756804214,
      "grad_norm": 6.302504539489746,
      "learning_rate": 4.727831431079895e-05,
      "loss": 0.1156,
      "step": 372
    },
    {
      "epoch": 0.10916008194322505,
      "grad_norm": 0.10329131036996841,
      "learning_rate": 4.727099795141938e-05,
      "loss": 0.0019,
      "step": 373
    },
    {
      "epoch": 0.10945273631840796,
      "grad_norm": 0.28105202317237854,
      "learning_rate": 4.7263681592039805e-05,
      "loss": 0.0041,
      "step": 374
    },
    {
      "epoch": 0.10974539069359086,
      "grad_norm": 0.2540721893310547,
      "learning_rate": 4.725636523266023e-05,
      "loss": 0.0038,
      "step": 375
    },
    {
      "epoch": 0.11003804506877378,
      "grad_norm": 0.22207878530025482,
      "learning_rate": 4.724904887328066e-05,
      "loss": 0.0015,
      "step": 376
    },
    {
      "epoch": 0.11033069944395668,
      "grad_norm": 3.857038736343384,
      "learning_rate": 4.724173251390108e-05,
      "loss": 0.086,
      "step": 377
    },
    {
      "epoch": 0.1106233538191396,
      "grad_norm": 2.7076306343078613,
      "learning_rate": 4.723441615452151e-05,
      "loss": 0.0153,
      "step": 378
    },
    {
      "epoch": 0.1109160081943225,
      "grad_norm": 0.03701874241232872,
      "learning_rate": 4.722709979514194e-05,
      "loss": 0.0007,
      "step": 379
    },
    {
      "epoch": 0.11120866256950541,
      "grad_norm": 0.05244317650794983,
      "learning_rate": 4.7219783435762366e-05,
      "loss": 0.0007,
      "step": 380
    },
    {
      "epoch": 0.11150131694468832,
      "grad_norm": 1.131405234336853,
      "learning_rate": 4.7212467076382794e-05,
      "loss": 0.0073,
      "step": 381
    },
    {
      "epoch": 0.11179397131987123,
      "grad_norm": 0.539952278137207,
      "learning_rate": 4.720515071700322e-05,
      "loss": 0.0044,
      "step": 382
    },
    {
      "epoch": 0.11208662569505413,
      "grad_norm": 0.02004413679242134,
      "learning_rate": 4.719783435762365e-05,
      "loss": 0.0004,
      "step": 383
    },
    {
      "epoch": 0.11237928007023705,
      "grad_norm": 0.018577802926301956,
      "learning_rate": 4.719051799824408e-05,
      "loss": 0.0002,
      "step": 384
    },
    {
      "epoch": 0.11267193444541995,
      "grad_norm": 0.09130682796239853,
      "learning_rate": 4.7183201638864506e-05,
      "loss": 0.0005,
      "step": 385
    },
    {
      "epoch": 0.11296458882060287,
      "grad_norm": 4.0011820793151855,
      "learning_rate": 4.7175885279484934e-05,
      "loss": 0.2058,
      "step": 386
    },
    {
      "epoch": 0.11325724319578578,
      "grad_norm": 7.127885818481445,
      "learning_rate": 4.716856892010536e-05,
      "loss": 0.0478,
      "step": 387
    },
    {
      "epoch": 0.11354989757096869,
      "grad_norm": 10.796640396118164,
      "learning_rate": 4.716125256072578e-05,
      "loss": 0.0723,
      "step": 388
    },
    {
      "epoch": 0.1138425519461516,
      "grad_norm": 0.36668506264686584,
      "learning_rate": 4.715393620134621e-05,
      "loss": 0.0016,
      "step": 389
    },
    {
      "epoch": 0.1141352063213345,
      "grad_norm": 3.7605903148651123,
      "learning_rate": 4.714661984196664e-05,
      "loss": 0.0238,
      "step": 390
    },
    {
      "epoch": 0.11442786069651742,
      "grad_norm": 8.716503143310547,
      "learning_rate": 4.713930348258707e-05,
      "loss": 0.1373,
      "step": 391
    },
    {
      "epoch": 0.11472051507170032,
      "grad_norm": 0.05329298600554466,
      "learning_rate": 4.7131987123207495e-05,
      "loss": 0.0004,
      "step": 392
    },
    {
      "epoch": 0.11501316944688324,
      "grad_norm": 6.454421043395996,
      "learning_rate": 4.712467076382792e-05,
      "loss": 0.0283,
      "step": 393
    },
    {
      "epoch": 0.11530582382206614,
      "grad_norm": 12.48361587524414,
      "learning_rate": 4.711735440444835e-05,
      "loss": 0.2669,
      "step": 394
    },
    {
      "epoch": 0.11559847819724905,
      "grad_norm": 2.376858949661255,
      "learning_rate": 4.711003804506878e-05,
      "loss": 0.2375,
      "step": 395
    },
    {
      "epoch": 0.11589113257243196,
      "grad_norm": 0.006335779093205929,
      "learning_rate": 4.7102721685689206e-05,
      "loss": 0.0001,
      "step": 396
    },
    {
      "epoch": 0.11618378694761487,
      "grad_norm": 0.9085770845413208,
      "learning_rate": 4.7095405326309634e-05,
      "loss": 0.0046,
      "step": 397
    },
    {
      "epoch": 0.11647644132279777,
      "grad_norm": 2.1395437717437744,
      "learning_rate": 4.7088088966930056e-05,
      "loss": 0.0096,
      "step": 398
    },
    {
      "epoch": 0.11676909569798069,
      "grad_norm": 1.47907292842865,
      "learning_rate": 4.7080772607550484e-05,
      "loss": 0.0114,
      "step": 399
    },
    {
      "epoch": 0.11706175007316359,
      "grad_norm": 6.841465473175049,
      "learning_rate": 4.707345624817091e-05,
      "loss": 0.0373,
      "step": 400
    },
    {
      "epoch": 0.11735440444834651,
      "grad_norm": 8.371074676513672,
      "learning_rate": 4.706613988879134e-05,
      "loss": 0.0509,
      "step": 401
    },
    {
      "epoch": 0.11764705882352941,
      "grad_norm": 0.017969602718949318,
      "learning_rate": 4.705882352941177e-05,
      "loss": 0.0004,
      "step": 402
    },
    {
      "epoch": 0.11793971319871233,
      "grad_norm": 0.12363521754741669,
      "learning_rate": 4.7051507170032195e-05,
      "loss": 0.0009,
      "step": 403
    },
    {
      "epoch": 0.11823236757389523,
      "grad_norm": 0.045213498175144196,
      "learning_rate": 4.704419081065262e-05,
      "loss": 0.001,
      "step": 404
    },
    {
      "epoch": 0.11852502194907814,
      "grad_norm": 3.5112273693084717,
      "learning_rate": 4.703687445127305e-05,
      "loss": 0.0107,
      "step": 405
    },
    {
      "epoch": 0.11881767632426105,
      "grad_norm": 0.22896206378936768,
      "learning_rate": 4.702955809189348e-05,
      "loss": 0.0018,
      "step": 406
    },
    {
      "epoch": 0.11911033069944396,
      "grad_norm": 2.320936441421509,
      "learning_rate": 4.702224173251391e-05,
      "loss": 0.0115,
      "step": 407
    },
    {
      "epoch": 0.11940298507462686,
      "grad_norm": 0.11919834464788437,
      "learning_rate": 4.7014925373134335e-05,
      "loss": 0.0014,
      "step": 408
    },
    {
      "epoch": 0.11969563944980978,
      "grad_norm": 0.05550792068243027,
      "learning_rate": 4.7007609013754756e-05,
      "loss": 0.0009,
      "step": 409
    },
    {
      "epoch": 0.11998829382499268,
      "grad_norm": 20.043973922729492,
      "learning_rate": 4.7000292654375184e-05,
      "loss": 0.0925,
      "step": 410
    },
    {
      "epoch": 0.1202809482001756,
      "grad_norm": 0.01288844645023346,
      "learning_rate": 4.699297629499561e-05,
      "loss": 0.0003,
      "step": 411
    },
    {
      "epoch": 0.1205736025753585,
      "grad_norm": 0.09516994655132294,
      "learning_rate": 4.698565993561604e-05,
      "loss": 0.0012,
      "step": 412
    },
    {
      "epoch": 0.12086625695054141,
      "grad_norm": 0.1977280080318451,
      "learning_rate": 4.697834357623647e-05,
      "loss": 0.0019,
      "step": 413
    },
    {
      "epoch": 0.12115891132572432,
      "grad_norm": 0.018458962440490723,
      "learning_rate": 4.6971027216856896e-05,
      "loss": 0.0003,
      "step": 414
    },
    {
      "epoch": 0.12145156570090723,
      "grad_norm": 0.01741596683859825,
      "learning_rate": 4.6963710857477324e-05,
      "loss": 0.0003,
      "step": 415
    },
    {
      "epoch": 0.12174422007609013,
      "grad_norm": 17.843650817871094,
      "learning_rate": 4.695639449809775e-05,
      "loss": 0.6303,
      "step": 416
    },
    {
      "epoch": 0.12203687445127305,
      "grad_norm": 1.637863278388977,
      "learning_rate": 4.694907813871818e-05,
      "loss": 0.0045,
      "step": 417
    },
    {
      "epoch": 0.12232952882645595,
      "grad_norm": 8.336296081542969,
      "learning_rate": 4.694176177933861e-05,
      "loss": 0.0237,
      "step": 418
    },
    {
      "epoch": 0.12262218320163887,
      "grad_norm": 0.021015867590904236,
      "learning_rate": 4.6934445419959036e-05,
      "loss": 0.0003,
      "step": 419
    },
    {
      "epoch": 0.12291483757682177,
      "grad_norm": 0.005957255605608225,
      "learning_rate": 4.692712906057946e-05,
      "loss": 0.0002,
      "step": 420
    },
    {
      "epoch": 0.12320749195200469,
      "grad_norm": 0.5390514135360718,
      "learning_rate": 4.6919812701199885e-05,
      "loss": 0.003,
      "step": 421
    },
    {
      "epoch": 0.12350014632718759,
      "grad_norm": 0.0064792693592607975,
      "learning_rate": 4.691249634182031e-05,
      "loss": 0.0002,
      "step": 422
    },
    {
      "epoch": 0.1237928007023705,
      "grad_norm": 0.06832041591405869,
      "learning_rate": 4.690517998244074e-05,
      "loss": 0.0006,
      "step": 423
    },
    {
      "epoch": 0.1240854550775534,
      "grad_norm": 13.302190780639648,
      "learning_rate": 4.689786362306117e-05,
      "loss": 0.6247,
      "step": 424
    },
    {
      "epoch": 0.12437810945273632,
      "grad_norm": 3.1761767864227295,
      "learning_rate": 4.68905472636816e-05,
      "loss": 0.2245,
      "step": 425
    },
    {
      "epoch": 0.12467076382791922,
      "grad_norm": 0.006169583182781935,
      "learning_rate": 4.6883230904302025e-05,
      "loss": 0.0001,
      "step": 426
    },
    {
      "epoch": 0.12496341820310214,
      "grad_norm": 7.2902445793151855,
      "learning_rate": 4.687591454492245e-05,
      "loss": 0.1874,
      "step": 427
    },
    {
      "epoch": 0.12525607257828505,
      "grad_norm": 0.026300964877009392,
      "learning_rate": 4.686859818554288e-05,
      "loss": 0.0005,
      "step": 428
    },
    {
      "epoch": 0.12554872695346794,
      "grad_norm": 0.057506389915943146,
      "learning_rate": 4.686128182616331e-05,
      "loss": 0.0012,
      "step": 429
    },
    {
      "epoch": 0.12584138132865086,
      "grad_norm": 0.14768287539482117,
      "learning_rate": 4.685396546678373e-05,
      "loss": 0.0017,
      "step": 430
    },
    {
      "epoch": 0.12613403570383377,
      "grad_norm": 3.4165399074554443,
      "learning_rate": 4.684664910740416e-05,
      "loss": 0.2147,
      "step": 431
    },
    {
      "epoch": 0.1264266900790167,
      "grad_norm": 1.6047797203063965,
      "learning_rate": 4.6839332748024586e-05,
      "loss": 0.0122,
      "step": 432
    },
    {
      "epoch": 0.12671934445419958,
      "grad_norm": 0.22430621087551117,
      "learning_rate": 4.6832016388645013e-05,
      "loss": 0.0042,
      "step": 433
    },
    {
      "epoch": 0.1270119988293825,
      "grad_norm": 10.875073432922363,
      "learning_rate": 4.682470002926544e-05,
      "loss": 0.1259,
      "step": 434
    },
    {
      "epoch": 0.1273046532045654,
      "grad_norm": 1.0021528005599976,
      "learning_rate": 4.681738366988587e-05,
      "loss": 0.0045,
      "step": 435
    },
    {
      "epoch": 0.12759730757974833,
      "grad_norm": 6.521137714385986,
      "learning_rate": 4.68100673105063e-05,
      "loss": 0.0449,
      "step": 436
    },
    {
      "epoch": 0.12788996195493121,
      "grad_norm": 0.07355611026287079,
      "learning_rate": 4.6802750951126725e-05,
      "loss": 0.0016,
      "step": 437
    },
    {
      "epoch": 0.12818261633011413,
      "grad_norm": 0.04785890877246857,
      "learning_rate": 4.679543459174715e-05,
      "loss": 0.0013,
      "step": 438
    },
    {
      "epoch": 0.12847527070529705,
      "grad_norm": 0.0513986274600029,
      "learning_rate": 4.678811823236758e-05,
      "loss": 0.0014,
      "step": 439
    },
    {
      "epoch": 0.12876792508047996,
      "grad_norm": 0.8326976895332336,
      "learning_rate": 4.678080187298801e-05,
      "loss": 0.0052,
      "step": 440
    },
    {
      "epoch": 0.12906057945566285,
      "grad_norm": 0.026337653398513794,
      "learning_rate": 4.677348551360843e-05,
      "loss": 0.0006,
      "step": 441
    },
    {
      "epoch": 0.12935323383084577,
      "grad_norm": 0.03743434324860573,
      "learning_rate": 4.676616915422886e-05,
      "loss": 0.0008,
      "step": 442
    },
    {
      "epoch": 0.12964588820602868,
      "grad_norm": 2.389418125152588,
      "learning_rate": 4.6758852794849286e-05,
      "loss": 0.0072,
      "step": 443
    },
    {
      "epoch": 0.1299385425812116,
      "grad_norm": 5.299021244049072,
      "learning_rate": 4.6751536435469714e-05,
      "loss": 0.2451,
      "step": 444
    },
    {
      "epoch": 0.13023119695639448,
      "grad_norm": 0.017874106764793396,
      "learning_rate": 4.674422007609014e-05,
      "loss": 0.0005,
      "step": 445
    },
    {
      "epoch": 0.1305238513315774,
      "grad_norm": 4.864397048950195,
      "learning_rate": 4.673690371671057e-05,
      "loss": 0.0796,
      "step": 446
    },
    {
      "epoch": 0.13081650570676032,
      "grad_norm": 0.06551877409219742,
      "learning_rate": 4.6729587357331e-05,
      "loss": 0.0012,
      "step": 447
    },
    {
      "epoch": 0.13110916008194323,
      "grad_norm": 0.039231959730386734,
      "learning_rate": 4.6722270997951426e-05,
      "loss": 0.0008,
      "step": 448
    },
    {
      "epoch": 0.13140181445712612,
      "grad_norm": 3.826850175857544,
      "learning_rate": 4.6714954638571854e-05,
      "loss": 0.0273,
      "step": 449
    },
    {
      "epoch": 0.13169446883230904,
      "grad_norm": 0.12824492156505585,
      "learning_rate": 4.6707638279192275e-05,
      "loss": 0.0018,
      "step": 450
    },
    {
      "epoch": 0.13198712320749195,
      "grad_norm": 3.2820913791656494,
      "learning_rate": 4.67003219198127e-05,
      "loss": 0.0184,
      "step": 451
    },
    {
      "epoch": 0.13227977758267487,
      "grad_norm": 0.05461831018328667,
      "learning_rate": 4.669300556043313e-05,
      "loss": 0.0009,
      "step": 452
    },
    {
      "epoch": 0.13257243195785778,
      "grad_norm": 0.10601229220628738,
      "learning_rate": 4.668568920105356e-05,
      "loss": 0.0014,
      "step": 453
    },
    {
      "epoch": 0.13286508633304067,
      "grad_norm": 0.0834844782948494,
      "learning_rate": 4.667837284167399e-05,
      "loss": 0.0013,
      "step": 454
    },
    {
      "epoch": 0.1331577407082236,
      "grad_norm": 3.3121674060821533,
      "learning_rate": 4.6671056482294415e-05,
      "loss": 0.1301,
      "step": 455
    },
    {
      "epoch": 0.1334503950834065,
      "grad_norm": 1.9951350688934326,
      "learning_rate": 4.666374012291484e-05,
      "loss": 0.013,
      "step": 456
    },
    {
      "epoch": 0.13374304945858942,
      "grad_norm": 4.8048930168151855,
      "learning_rate": 4.665642376353527e-05,
      "loss": 0.3001,
      "step": 457
    },
    {
      "epoch": 0.1340357038337723,
      "grad_norm": 0.02953229285776615,
      "learning_rate": 4.664910740415569e-05,
      "loss": 0.0007,
      "step": 458
    },
    {
      "epoch": 0.13432835820895522,
      "grad_norm": 1.4468019008636475,
      "learning_rate": 4.664179104477612e-05,
      "loss": 0.0078,
      "step": 459
    },
    {
      "epoch": 0.13462101258413814,
      "grad_norm": 0.08909465372562408,
      "learning_rate": 4.663447468539655e-05,
      "loss": 0.0014,
      "step": 460
    },
    {
      "epoch": 0.13491366695932105,
      "grad_norm": 0.17444898188114166,
      "learning_rate": 4.6627158326016976e-05,
      "loss": 0.0018,
      "step": 461
    },
    {
      "epoch": 0.13520632133450394,
      "grad_norm": 5.046396732330322,
      "learning_rate": 4.6619841966637404e-05,
      "loss": 0.025,
      "step": 462
    },
    {
      "epoch": 0.13549897570968686,
      "grad_norm": 0.37730321288108826,
      "learning_rate": 4.661252560725783e-05,
      "loss": 0.0031,
      "step": 463
    },
    {
      "epoch": 0.13579163008486977,
      "grad_norm": 10.075611114501953,
      "learning_rate": 4.660520924787826e-05,
      "loss": 0.106,
      "step": 464
    },
    {
      "epoch": 0.1360842844600527,
      "grad_norm": 0.08799745887517929,
      "learning_rate": 4.659789288849869e-05,
      "loss": 0.0012,
      "step": 465
    },
    {
      "epoch": 0.13637693883523558,
      "grad_norm": 0.4172189235687256,
      "learning_rate": 4.659057652911911e-05,
      "loss": 0.0036,
      "step": 466
    },
    {
      "epoch": 0.1366695932104185,
      "grad_norm": 0.1687348634004593,
      "learning_rate": 4.658326016973954e-05,
      "loss": 0.0019,
      "step": 467
    },
    {
      "epoch": 0.1369622475856014,
      "grad_norm": 0.40193888545036316,
      "learning_rate": 4.6575943810359965e-05,
      "loss": 0.0033,
      "step": 468
    },
    {
      "epoch": 0.13725490196078433,
      "grad_norm": 0.05767688527703285,
      "learning_rate": 4.656862745098039e-05,
      "loss": 0.0009,
      "step": 469
    },
    {
      "epoch": 0.1375475563359672,
      "grad_norm": 4.644579887390137,
      "learning_rate": 4.656131109160082e-05,
      "loss": 0.0194,
      "step": 470
    },
    {
      "epoch": 0.13784021071115013,
      "grad_norm": 0.018554113805294037,
      "learning_rate": 4.655399473222125e-05,
      "loss": 0.0004,
      "step": 471
    },
    {
      "epoch": 0.13813286508633305,
      "grad_norm": 0.3066471219062805,
      "learning_rate": 4.6546678372841676e-05,
      "loss": 0.0016,
      "step": 472
    },
    {
      "epoch": 0.13842551946151596,
      "grad_norm": 4.61218786239624,
      "learning_rate": 4.6539362013462104e-05,
      "loss": 0.0225,
      "step": 473
    },
    {
      "epoch": 0.13871817383669885,
      "grad_norm": 1.0650144815444946,
      "learning_rate": 4.6532045654082526e-05,
      "loss": 0.0039,
      "step": 474
    },
    {
      "epoch": 0.13901082821188177,
      "grad_norm": 0.009785358794033527,
      "learning_rate": 4.6524729294702954e-05,
      "loss": 0.0002,
      "step": 475
    },
    {
      "epoch": 0.13930348258706468,
      "grad_norm": 1.6635420322418213,
      "learning_rate": 4.651741293532338e-05,
      "loss": 0.0076,
      "step": 476
    },
    {
      "epoch": 0.1395961369622476,
      "grad_norm": 3.513559341430664,
      "learning_rate": 4.651009657594381e-05,
      "loss": 0.0048,
      "step": 477
    },
    {
      "epoch": 0.13988879133743048,
      "grad_norm": 7.961835861206055,
      "learning_rate": 4.650278021656424e-05,
      "loss": 0.1191,
      "step": 478
    },
    {
      "epoch": 0.1401814457126134,
      "grad_norm": 0.03598767891526222,
      "learning_rate": 4.6495463857184665e-05,
      "loss": 0.0003,
      "step": 479
    },
    {
      "epoch": 0.14047410008779632,
      "grad_norm": 14.998804092407227,
      "learning_rate": 4.648814749780509e-05,
      "loss": 0.0693,
      "step": 480
    },
    {
      "epoch": 0.14076675446297923,
      "grad_norm": 0.04784742742776871,
      "learning_rate": 4.648083113842552e-05,
      "loss": 0.0004,
      "step": 481
    },
    {
      "epoch": 0.14105940883816212,
      "grad_norm": 10.379205703735352,
      "learning_rate": 4.647351477904595e-05,
      "loss": 0.2895,
      "step": 482
    },
    {
      "epoch": 0.14135206321334504,
      "grad_norm": 0.007849453948438168,
      "learning_rate": 4.646619841966637e-05,
      "loss": 0.0001,
      "step": 483
    },
    {
      "epoch": 0.14164471758852795,
      "grad_norm": 0.429504930973053,
      "learning_rate": 4.64588820602868e-05,
      "loss": 0.0019,
      "step": 484
    },
    {
      "epoch": 0.14193737196371087,
      "grad_norm": 4.247195243835449,
      "learning_rate": 4.6451565700907226e-05,
      "loss": 0.2315,
      "step": 485
    },
    {
      "epoch": 0.14223002633889376,
      "grad_norm": 0.00903258752077818,
      "learning_rate": 4.6444249341527654e-05,
      "loss": 0.0002,
      "step": 486
    },
    {
      "epoch": 0.14252268071407667,
      "grad_norm": 0.26237669587135315,
      "learning_rate": 4.643693298214808e-05,
      "loss": 0.0017,
      "step": 487
    },
    {
      "epoch": 0.1428153350892596,
      "grad_norm": 0.1016833707690239,
      "learning_rate": 4.642961662276851e-05,
      "loss": 0.0005,
      "step": 488
    },
    {
      "epoch": 0.1431079894644425,
      "grad_norm": 2.977407217025757,
      "learning_rate": 4.642230026338894e-05,
      "loss": 0.0126,
      "step": 489
    },
    {
      "epoch": 0.1434006438396254,
      "grad_norm": 2.0009970664978027,
      "learning_rate": 4.6414983904009366e-05,
      "loss": 0.3651,
      "step": 490
    },
    {
      "epoch": 0.1436932982148083,
      "grad_norm": 0.0034312792122364044,
      "learning_rate": 4.6407667544629794e-05,
      "loss": 0.0001,
      "step": 491
    },
    {
      "epoch": 0.14398595258999122,
      "grad_norm": 0.011009294539690018,
      "learning_rate": 4.640035118525022e-05,
      "loss": 0.0003,
      "step": 492
    },
    {
      "epoch": 0.14427860696517414,
      "grad_norm": 11.964802742004395,
      "learning_rate": 4.639303482587065e-05,
      "loss": 0.0375,
      "step": 493
    },
    {
      "epoch": 0.14457126134035703,
      "grad_norm": 0.011789514683187008,
      "learning_rate": 4.638571846649107e-05,
      "loss": 0.0003,
      "step": 494
    },
    {
      "epoch": 0.14486391571553994,
      "grad_norm": 12.071039199829102,
      "learning_rate": 4.63784021071115e-05,
      "loss": 0.052,
      "step": 495
    },
    {
      "epoch": 0.14515657009072286,
      "grad_norm": 0.056693222373723984,
      "learning_rate": 4.637108574773193e-05,
      "loss": 0.0011,
      "step": 496
    },
    {
      "epoch": 0.14544922446590577,
      "grad_norm": 0.6062968969345093,
      "learning_rate": 4.6363769388352355e-05,
      "loss": 0.0066,
      "step": 497
    },
    {
      "epoch": 0.14574187884108866,
      "grad_norm": 0.1888841986656189,
      "learning_rate": 4.635645302897278e-05,
      "loss": 0.0032,
      "step": 498
    },
    {
      "epoch": 0.14603453321627158,
      "grad_norm": 0.270308256149292,
      "learning_rate": 4.634913666959321e-05,
      "loss": 0.0046,
      "step": 499
    },
    {
      "epoch": 0.1463271875914545,
      "grad_norm": 13.553791999816895,
      "learning_rate": 4.634182031021364e-05,
      "loss": 0.3013,
      "step": 500
    },
    {
      "epoch": 0.1466198419666374,
      "grad_norm": 9.983979225158691,
      "learning_rate": 4.633450395083407e-05,
      "loss": 0.2404,
      "step": 501
    },
    {
      "epoch": 0.1469124963418203,
      "grad_norm": 0.04803050309419632,
      "learning_rate": 4.6327187591454495e-05,
      "loss": 0.0011,
      "step": 502
    },
    {
      "epoch": 0.1472051507170032,
      "grad_norm": 7.278826713562012,
      "learning_rate": 4.631987123207492e-05,
      "loss": 0.0522,
      "step": 503
    },
    {
      "epoch": 0.14749780509218613,
      "grad_norm": 0.9605130553245544,
      "learning_rate": 4.631255487269535e-05,
      "loss": 0.0057,
      "step": 504
    },
    {
      "epoch": 0.14779045946736905,
      "grad_norm": 0.018609413877129555,
      "learning_rate": 4.630523851331577e-05,
      "loss": 0.0004,
      "step": 505
    },
    {
      "epoch": 0.14808311384255193,
      "grad_norm": 9.427624702453613,
      "learning_rate": 4.62979221539362e-05,
      "loss": 0.1103,
      "step": 506
    },
    {
      "epoch": 0.14837576821773485,
      "grad_norm": 9.950340270996094,
      "learning_rate": 4.629060579455663e-05,
      "loss": 0.1134,
      "step": 507
    },
    {
      "epoch": 0.14866842259291776,
      "grad_norm": 0.006434707902371883,
      "learning_rate": 4.6283289435177056e-05,
      "loss": 0.0001,
      "step": 508
    },
    {
      "epoch": 0.14896107696810068,
      "grad_norm": 7.503812313079834,
      "learning_rate": 4.6275973075797484e-05,
      "loss": 0.0604,
      "step": 509
    },
    {
      "epoch": 0.14925373134328357,
      "grad_norm": 0.28119999170303345,
      "learning_rate": 4.626865671641791e-05,
      "loss": 0.004,
      "step": 510
    },
    {
      "epoch": 0.14954638571846648,
      "grad_norm": 0.01554897427558899,
      "learning_rate": 4.626134035703834e-05,
      "loss": 0.0005,
      "step": 511
    },
    {
      "epoch": 0.1498390400936494,
      "grad_norm": 0.039249517023563385,
      "learning_rate": 4.625402399765877e-05,
      "loss": 0.0009,
      "step": 512
    },
    {
      "epoch": 0.15013169446883232,
      "grad_norm": 6.196067810058594,
      "learning_rate": 4.6246707638279195e-05,
      "loss": 0.0234,
      "step": 513
    },
    {
      "epoch": 0.1504243488440152,
      "grad_norm": 1.96540105342865,
      "learning_rate": 4.623939127889962e-05,
      "loss": 0.0051,
      "step": 514
    },
    {
      "epoch": 0.15071700321919812,
      "grad_norm": 0.01271872315555811,
      "learning_rate": 4.6232074919520044e-05,
      "loss": 0.0004,
      "step": 515
    },
    {
      "epoch": 0.15100965759438104,
      "grad_norm": 4.049911975860596,
      "learning_rate": 4.622475856014047e-05,
      "loss": 0.1171,
      "step": 516
    },
    {
      "epoch": 0.15130231196956395,
      "grad_norm": 0.03024102747440338,
      "learning_rate": 4.62174422007609e-05,
      "loss": 0.0008,
      "step": 517
    },
    {
      "epoch": 0.15159496634474687,
      "grad_norm": 0.016120215877890587,
      "learning_rate": 4.621012584138133e-05,
      "loss": 0.0002,
      "step": 518
    },
    {
      "epoch": 0.15188762071992976,
      "grad_norm": 0.02355908416211605,
      "learning_rate": 4.6202809482001756e-05,
      "loss": 0.0006,
      "step": 519
    },
    {
      "epoch": 0.15218027509511267,
      "grad_norm": 0.0034723831340670586,
      "learning_rate": 4.6195493122622184e-05,
      "loss": 0.0001,
      "step": 520
    },
    {
      "epoch": 0.1524729294702956,
      "grad_norm": 7.882876396179199,
      "learning_rate": 4.618817676324261e-05,
      "loss": 0.0366,
      "step": 521
    },
    {
      "epoch": 0.1527655838454785,
      "grad_norm": 0.08890720456838608,
      "learning_rate": 4.618086040386304e-05,
      "loss": 0.0015,
      "step": 522
    },
    {
      "epoch": 0.1530582382206614,
      "grad_norm": 0.041916172951459885,
      "learning_rate": 4.617354404448347e-05,
      "loss": 0.0009,
      "step": 523
    },
    {
      "epoch": 0.1533508925958443,
      "grad_norm": 0.06541915237903595,
      "learning_rate": 4.6166227685103896e-05,
      "loss": 0.0009,
      "step": 524
    },
    {
      "epoch": 0.15364354697102722,
      "grad_norm": 0.030568156391382217,
      "learning_rate": 4.6158911325724324e-05,
      "loss": 0.0007,
      "step": 525
    },
    {
      "epoch": 0.15393620134621014,
      "grad_norm": 0.005763526074588299,
      "learning_rate": 4.6151594966344745e-05,
      "loss": 0.0001,
      "step": 526
    },
    {
      "epoch": 0.15422885572139303,
      "grad_norm": 0.03486074134707451,
      "learning_rate": 4.614427860696517e-05,
      "loss": 0.0006,
      "step": 527
    },
    {
      "epoch": 0.15452151009657594,
      "grad_norm": 0.004300899337977171,
      "learning_rate": 4.61369622475856e-05,
      "loss": 0.0001,
      "step": 528
    },
    {
      "epoch": 0.15481416447175886,
      "grad_norm": 1.4314249753952026,
      "learning_rate": 4.612964588820603e-05,
      "loss": 0.0127,
      "step": 529
    },
    {
      "epoch": 0.15510681884694177,
      "grad_norm": 0.026534054428339005,
      "learning_rate": 4.612232952882646e-05,
      "loss": 0.0005,
      "step": 530
    },
    {
      "epoch": 0.15539947322212466,
      "grad_norm": 0.032224852591753006,
      "learning_rate": 4.6115013169446885e-05,
      "loss": 0.0004,
      "step": 531
    },
    {
      "epoch": 0.15569212759730758,
      "grad_norm": 9.825483322143555,
      "learning_rate": 4.610769681006731e-05,
      "loss": 0.0773,
      "step": 532
    },
    {
      "epoch": 0.1559847819724905,
      "grad_norm": 1.552971363067627,
      "learning_rate": 4.610038045068774e-05,
      "loss": 0.004,
      "step": 533
    },
    {
      "epoch": 0.1562774363476734,
      "grad_norm": 8.752983093261719,
      "learning_rate": 4.609306409130817e-05,
      "loss": 0.0626,
      "step": 534
    },
    {
      "epoch": 0.1565700907228563,
      "grad_norm": 0.9281281232833862,
      "learning_rate": 4.60857477319286e-05,
      "loss": 0.0047,
      "step": 535
    },
    {
      "epoch": 0.1568627450980392,
      "grad_norm": 5.323596000671387,
      "learning_rate": 4.607843137254902e-05,
      "loss": 0.1174,
      "step": 536
    },
    {
      "epoch": 0.15715539947322213,
      "grad_norm": 0.03563448414206505,
      "learning_rate": 4.6071115013169446e-05,
      "loss": 0.0003,
      "step": 537
    },
    {
      "epoch": 0.15744805384840505,
      "grad_norm": 0.016390468925237656,
      "learning_rate": 4.6063798653789874e-05,
      "loss": 0.0004,
      "step": 538
    },
    {
      "epoch": 0.15774070822358793,
      "grad_norm": 0.040976326912641525,
      "learning_rate": 4.60564822944103e-05,
      "loss": 0.0004,
      "step": 539
    },
    {
      "epoch": 0.15803336259877085,
      "grad_norm": 0.12268855422735214,
      "learning_rate": 4.604916593503073e-05,
      "loss": 0.0008,
      "step": 540
    },
    {
      "epoch": 0.15832601697395376,
      "grad_norm": 0.006396189332008362,
      "learning_rate": 4.604184957565116e-05,
      "loss": 0.0001,
      "step": 541
    },
    {
      "epoch": 0.15861867134913668,
      "grad_norm": 0.20329192280769348,
      "learning_rate": 4.6034533216271586e-05,
      "loss": 0.0012,
      "step": 542
    },
    {
      "epoch": 0.15891132572431957,
      "grad_norm": 6.874823093414307,
      "learning_rate": 4.6027216856892013e-05,
      "loss": 0.2398,
      "step": 543
    },
    {
      "epoch": 0.15920398009950248,
      "grad_norm": 0.5490859150886536,
      "learning_rate": 4.601990049751244e-05,
      "loss": 0.0039,
      "step": 544
    },
    {
      "epoch": 0.1594966344746854,
      "grad_norm": 0.047674331814050674,
      "learning_rate": 4.601258413813287e-05,
      "loss": 0.0007,
      "step": 545
    },
    {
      "epoch": 0.15978928884986832,
      "grad_norm": 4.832897186279297,
      "learning_rate": 4.60052677787533e-05,
      "loss": 0.2455,
      "step": 546
    },
    {
      "epoch": 0.1600819432250512,
      "grad_norm": 4.292545318603516,
      "learning_rate": 4.599795141937372e-05,
      "loss": 0.0733,
      "step": 547
    },
    {
      "epoch": 0.16037459760023412,
      "grad_norm": 0.06033731997013092,
      "learning_rate": 4.5990635059994146e-05,
      "loss": 0.0009,
      "step": 548
    },
    {
      "epoch": 0.16066725197541704,
      "grad_norm": 0.011225472204387188,
      "learning_rate": 4.5983318700614574e-05,
      "loss": 0.0002,
      "step": 549
    },
    {
      "epoch": 0.16095990635059995,
      "grad_norm": 1.4956049919128418,
      "learning_rate": 4.5976002341235e-05,
      "loss": 0.0051,
      "step": 550
    },
    {
      "epoch": 0.16125256072578284,
      "grad_norm": 0.024007853120565414,
      "learning_rate": 4.596868598185543e-05,
      "loss": 0.0006,
      "step": 551
    },
    {
      "epoch": 0.16154521510096576,
      "grad_norm": 1.055830955505371,
      "learning_rate": 4.596136962247586e-05,
      "loss": 0.0063,
      "step": 552
    },
    {
      "epoch": 0.16183786947614867,
      "grad_norm": 3.8573505878448486,
      "learning_rate": 4.5954053263096286e-05,
      "loss": 0.0249,
      "step": 553
    },
    {
      "epoch": 0.1621305238513316,
      "grad_norm": 0.03098081424832344,
      "learning_rate": 4.5946736903716714e-05,
      "loss": 0.0005,
      "step": 554
    },
    {
      "epoch": 0.16242317822651448,
      "grad_norm": 4.926241397857666,
      "learning_rate": 4.593942054433714e-05,
      "loss": 0.0513,
      "step": 555
    },
    {
      "epoch": 0.1627158326016974,
      "grad_norm": 0.04326876625418663,
      "learning_rate": 4.593210418495757e-05,
      "loss": 0.0006,
      "step": 556
    },
    {
      "epoch": 0.1630084869768803,
      "grad_norm": 21.93604850769043,
      "learning_rate": 4.5924787825578e-05,
      "loss": 0.0702,
      "step": 557
    },
    {
      "epoch": 0.16330114135206322,
      "grad_norm": 4.877942085266113,
      "learning_rate": 4.591747146619842e-05,
      "loss": 0.1671,
      "step": 558
    },
    {
      "epoch": 0.1635937957272461,
      "grad_norm": 0.2925262153148651,
      "learning_rate": 4.591015510681885e-05,
      "loss": 0.0025,
      "step": 559
    },
    {
      "epoch": 0.16388645010242903,
      "grad_norm": 15.076410293579102,
      "learning_rate": 4.5902838747439275e-05,
      "loss": 0.083,
      "step": 560
    },
    {
      "epoch": 0.16417910447761194,
      "grad_norm": 6.79744815826416,
      "learning_rate": 4.58955223880597e-05,
      "loss": 0.0981,
      "step": 561
    },
    {
      "epoch": 0.16447175885279486,
      "grad_norm": 5.972877025604248,
      "learning_rate": 4.588820602868013e-05,
      "loss": 0.0384,
      "step": 562
    },
    {
      "epoch": 0.16476441322797775,
      "grad_norm": 0.05233852192759514,
      "learning_rate": 4.588088966930056e-05,
      "loss": 0.0009,
      "step": 563
    },
    {
      "epoch": 0.16505706760316066,
      "grad_norm": 0.03200345113873482,
      "learning_rate": 4.587357330992099e-05,
      "loss": 0.0008,
      "step": 564
    },
    {
      "epoch": 0.16534972197834358,
      "grad_norm": 0.38592442870140076,
      "learning_rate": 4.5866256950541415e-05,
      "loss": 0.0041,
      "step": 565
    },
    {
      "epoch": 0.1656423763535265,
      "grad_norm": 11.63177490234375,
      "learning_rate": 4.585894059116184e-05,
      "loss": 0.2104,
      "step": 566
    },
    {
      "epoch": 0.16593503072870938,
      "grad_norm": 0.04096129164099693,
      "learning_rate": 4.585162423178227e-05,
      "loss": 0.001,
      "step": 567
    },
    {
      "epoch": 0.1662276851038923,
      "grad_norm": 0.05317602679133415,
      "learning_rate": 4.584430787240269e-05,
      "loss": 0.0011,
      "step": 568
    },
    {
      "epoch": 0.1665203394790752,
      "grad_norm": 0.1012129858136177,
      "learning_rate": 4.583699151302312e-05,
      "loss": 0.0016,
      "step": 569
    },
    {
      "epoch": 0.16681299385425813,
      "grad_norm": 1.065040946006775,
      "learning_rate": 4.582967515364355e-05,
      "loss": 0.0051,
      "step": 570
    },
    {
      "epoch": 0.16710564822944102,
      "grad_norm": 4.996240615844727,
      "learning_rate": 4.5822358794263976e-05,
      "loss": 0.0522,
      "step": 571
    },
    {
      "epoch": 0.16739830260462393,
      "grad_norm": 0.16447268426418304,
      "learning_rate": 4.5815042434884404e-05,
      "loss": 0.0015,
      "step": 572
    },
    {
      "epoch": 0.16769095697980685,
      "grad_norm": 4.073005199432373,
      "learning_rate": 4.580772607550483e-05,
      "loss": 0.0199,
      "step": 573
    },
    {
      "epoch": 0.16798361135498976,
      "grad_norm": 0.011858438141644001,
      "learning_rate": 4.580040971612526e-05,
      "loss": 0.0003,
      "step": 574
    },
    {
      "epoch": 0.16827626573017265,
      "grad_norm": 0.06540852785110474,
      "learning_rate": 4.579309335674569e-05,
      "loss": 0.0008,
      "step": 575
    },
    {
      "epoch": 0.16856892010535557,
      "grad_norm": 0.6887805461883545,
      "learning_rate": 4.5785776997366116e-05,
      "loss": 0.0074,
      "step": 576
    },
    {
      "epoch": 0.16886157448053848,
      "grad_norm": 0.4583264887332916,
      "learning_rate": 4.5778460637986543e-05,
      "loss": 0.0056,
      "step": 577
    },
    {
      "epoch": 0.1691542288557214,
      "grad_norm": 0.017056701704859734,
      "learning_rate": 4.577114427860697e-05,
      "loss": 0.0003,
      "step": 578
    },
    {
      "epoch": 0.1694468832309043,
      "grad_norm": 0.004097268916666508,
      "learning_rate": 4.576382791922739e-05,
      "loss": 0.0001,
      "step": 579
    },
    {
      "epoch": 0.1697395376060872,
      "grad_norm": 0.012352986261248589,
      "learning_rate": 4.575651155984782e-05,
      "loss": 0.0003,
      "step": 580
    },
    {
      "epoch": 0.17003219198127012,
      "grad_norm": 0.0030429689213633537,
      "learning_rate": 4.574919520046825e-05,
      "loss": 0.0001,
      "step": 581
    },
    {
      "epoch": 0.17032484635645304,
      "grad_norm": 0.2785024344921112,
      "learning_rate": 4.5741878841088676e-05,
      "loss": 0.0013,
      "step": 582
    },
    {
      "epoch": 0.17061750073163595,
      "grad_norm": 0.05230037495493889,
      "learning_rate": 4.5734562481709104e-05,
      "loss": 0.0004,
      "step": 583
    },
    {
      "epoch": 0.17091015510681884,
      "grad_norm": 0.00787578895688057,
      "learning_rate": 4.572724612232953e-05,
      "loss": 0.0002,
      "step": 584
    },
    {
      "epoch": 0.17120280948200176,
      "grad_norm": 0.4896010458469391,
      "learning_rate": 4.571992976294996e-05,
      "loss": 0.0026,
      "step": 585
    },
    {
      "epoch": 0.17149546385718467,
      "grad_norm": 0.0032089147716760635,
      "learning_rate": 4.571261340357039e-05,
      "loss": 0.0001,
      "step": 586
    },
    {
      "epoch": 0.1717881182323676,
      "grad_norm": 0.011004786007106304,
      "learning_rate": 4.5705297044190816e-05,
      "loss": 0.0002,
      "step": 587
    },
    {
      "epoch": 0.17208077260755048,
      "grad_norm": 0.0479825995862484,
      "learning_rate": 4.5697980684811244e-05,
      "loss": 0.0005,
      "step": 588
    },
    {
      "epoch": 0.1723734269827334,
      "grad_norm": 0.0029730063397437334,
      "learning_rate": 4.5690664325431665e-05,
      "loss": 0.0001,
      "step": 589
    },
    {
      "epoch": 0.1726660813579163,
      "grad_norm": 0.015313234180212021,
      "learning_rate": 4.568334796605209e-05,
      "loss": 0.0003,
      "step": 590
    },
    {
      "epoch": 0.17295873573309922,
      "grad_norm": 0.017034409567713737,
      "learning_rate": 4.567603160667252e-05,
      "loss": 0.0002,
      "step": 591
    },
    {
      "epoch": 0.1732513901082821,
      "grad_norm": 0.4127567708492279,
      "learning_rate": 4.566871524729295e-05,
      "loss": 0.0016,
      "step": 592
    },
    {
      "epoch": 0.17354404448346503,
      "grad_norm": 0.015415014699101448,
      "learning_rate": 4.566139888791338e-05,
      "loss": 0.0002,
      "step": 593
    },
    {
      "epoch": 0.17383669885864794,
      "grad_norm": 0.0043084463104605675,
      "learning_rate": 4.5654082528533805e-05,
      "loss": 0.0001,
      "step": 594
    },
    {
      "epoch": 0.17412935323383086,
      "grad_norm": 0.0045915767550468445,
      "learning_rate": 4.564676616915423e-05,
      "loss": 0.0001,
      "step": 595
    },
    {
      "epoch": 0.17442200760901375,
      "grad_norm": 0.014552316628396511,
      "learning_rate": 4.563944980977466e-05,
      "loss": 0.0002,
      "step": 596
    },
    {
      "epoch": 0.17471466198419666,
      "grad_norm": 0.015970690175890923,
      "learning_rate": 4.563213345039509e-05,
      "loss": 0.0002,
      "step": 597
    },
    {
      "epoch": 0.17500731635937958,
      "grad_norm": 0.0026189619675278664,
      "learning_rate": 4.562481709101552e-05,
      "loss": 0.0001,
      "step": 598
    },
    {
      "epoch": 0.1752999707345625,
      "grad_norm": 0.0028786526527255774,
      "learning_rate": 4.5617500731635945e-05,
      "loss": 0.0001,
      "step": 599
    },
    {
      "epoch": 0.17559262510974538,
      "grad_norm": 0.015483339317142963,
      "learning_rate": 4.5610184372256366e-05,
      "loss": 0.0002,
      "step": 600
    },
    {
      "epoch": 0.1758852794849283,
      "grad_norm": 0.05966808274388313,
      "learning_rate": 4.5602868012876794e-05,
      "loss": 0.0003,
      "step": 601
    },
    {
      "epoch": 0.1761779338601112,
      "grad_norm": 0.006986284162849188,
      "learning_rate": 4.559555165349722e-05,
      "loss": 0.0001,
      "step": 602
    },
    {
      "epoch": 0.17647058823529413,
      "grad_norm": 0.0017342488281428814,
      "learning_rate": 4.558823529411765e-05,
      "loss": 0.0,
      "step": 603
    },
    {
      "epoch": 0.17676324261047702,
      "grad_norm": 0.1534646898508072,
      "learning_rate": 4.558091893473808e-05,
      "loss": 0.0007,
      "step": 604
    },
    {
      "epoch": 0.17705589698565993,
      "grad_norm": 0.025370581075549126,
      "learning_rate": 4.5573602575358506e-05,
      "loss": 0.0001,
      "step": 605
    },
    {
      "epoch": 0.17734855136084285,
      "grad_norm": 0.0031309896148741245,
      "learning_rate": 4.5566286215978934e-05,
      "loss": 0.0001,
      "step": 606
    },
    {
      "epoch": 0.17764120573602576,
      "grad_norm": 0.003534929594025016,
      "learning_rate": 4.555896985659936e-05,
      "loss": 0.0001,
      "step": 607
    },
    {
      "epoch": 0.17793386011120865,
      "grad_norm": 1.1894418001174927,
      "learning_rate": 4.555165349721979e-05,
      "loss": 0.0026,
      "step": 608
    },
    {
      "epoch": 0.17822651448639157,
      "grad_norm": 0.0013738750712946057,
      "learning_rate": 4.554433713784022e-05,
      "loss": 0.0,
      "step": 609
    },
    {
      "epoch": 0.17851916886157448,
      "grad_norm": 3.998434066772461,
      "learning_rate": 4.5537020778460645e-05,
      "loss": 0.2078,
      "step": 610
    },
    {
      "epoch": 0.1788118232367574,
      "grad_norm": 0.041833844035863876,
      "learning_rate": 4.552970441908107e-05,
      "loss": 0.0002,
      "step": 611
    },
    {
      "epoch": 0.1791044776119403,
      "grad_norm": 0.0021251097787171602,
      "learning_rate": 4.5522388059701495e-05,
      "loss": 0.0,
      "step": 612
    },
    {
      "epoch": 0.1793971319871232,
      "grad_norm": 4.649111270904541,
      "learning_rate": 4.551507170032192e-05,
      "loss": 0.1573,
      "step": 613
    },
    {
      "epoch": 0.17968978636230612,
      "grad_norm": 0.46552494168281555,
      "learning_rate": 4.550775534094235e-05,
      "loss": 0.0023,
      "step": 614
    },
    {
      "epoch": 0.17998244073748904,
      "grad_norm": 0.030416730791330338,
      "learning_rate": 4.550043898156278e-05,
      "loss": 0.0004,
      "step": 615
    },
    {
      "epoch": 0.18027509511267192,
      "grad_norm": 4.120852947235107,
      "learning_rate": 4.5493122622183206e-05,
      "loss": 0.1734,
      "step": 616
    },
    {
      "epoch": 0.18056774948785484,
      "grad_norm": 0.8814420104026794,
      "learning_rate": 4.5485806262803634e-05,
      "loss": 0.0056,
      "step": 617
    },
    {
      "epoch": 0.18086040386303776,
      "grad_norm": 0.9062165021896362,
      "learning_rate": 4.547848990342406e-05,
      "loss": 0.0061,
      "step": 618
    },
    {
      "epoch": 0.18115305823822067,
      "grad_norm": 0.49208158254623413,
      "learning_rate": 4.547117354404449e-05,
      "loss": 0.0048,
      "step": 619
    },
    {
      "epoch": 0.18144571261340356,
      "grad_norm": 0.15719246864318848,
      "learning_rate": 4.546385718466492e-05,
      "loss": 0.0019,
      "step": 620
    },
    {
      "epoch": 0.18173836698858647,
      "grad_norm": 0.005446034017950296,
      "learning_rate": 4.545654082528534e-05,
      "loss": 0.0001,
      "step": 621
    },
    {
      "epoch": 0.1820310213637694,
      "grad_norm": 0.009686796925961971,
      "learning_rate": 4.544922446590577e-05,
      "loss": 0.0002,
      "step": 622
    },
    {
      "epoch": 0.1823236757389523,
      "grad_norm": 0.31266066431999207,
      "learning_rate": 4.5441908106526195e-05,
      "loss": 0.0009,
      "step": 623
    },
    {
      "epoch": 0.1826163301141352,
      "grad_norm": 0.46191877126693726,
      "learning_rate": 4.543459174714662e-05,
      "loss": 0.0016,
      "step": 624
    },
    {
      "epoch": 0.1829089844893181,
      "grad_norm": 0.0038166295271366835,
      "learning_rate": 4.542727538776705e-05,
      "loss": 0.0001,
      "step": 625
    },
    {
      "epoch": 0.18320163886450103,
      "grad_norm": 0.07279108464717865,
      "learning_rate": 4.541995902838748e-05,
      "loss": 0.0003,
      "step": 626
    },
    {
      "epoch": 0.18349429323968394,
      "grad_norm": 8.50744342803955,
      "learning_rate": 4.541264266900791e-05,
      "loss": 0.1231,
      "step": 627
    },
    {
      "epoch": 0.18378694761486683,
      "grad_norm": 9.774702072143555,
      "learning_rate": 4.5405326309628335e-05,
      "loss": 0.3219,
      "step": 628
    },
    {
      "epoch": 0.18407960199004975,
      "grad_norm": 0.024404382333159447,
      "learning_rate": 4.539800995024876e-05,
      "loss": 0.0002,
      "step": 629
    },
    {
      "epoch": 0.18437225636523266,
      "grad_norm": 0.0032634008675813675,
      "learning_rate": 4.539069359086919e-05,
      "loss": 0.0001,
      "step": 630
    },
    {
      "epoch": 0.18466491074041558,
      "grad_norm": 0.003782094456255436,
      "learning_rate": 4.538337723148962e-05,
      "loss": 0.0001,
      "step": 631
    },
    {
      "epoch": 0.18495756511559847,
      "grad_norm": 0.004177380353212357,
      "learning_rate": 4.537606087211004e-05,
      "loss": 0.0001,
      "step": 632
    },
    {
      "epoch": 0.18525021949078138,
      "grad_norm": 8.427203178405762,
      "learning_rate": 4.536874451273047e-05,
      "loss": 0.0314,
      "step": 633
    },
    {
      "epoch": 0.1855428738659643,
      "grad_norm": 0.07821492105722427,
      "learning_rate": 4.5361428153350896e-05,
      "loss": 0.0007,
      "step": 634
    },
    {
      "epoch": 0.1858355282411472,
      "grad_norm": 0.7422145009040833,
      "learning_rate": 4.5354111793971324e-05,
      "loss": 0.0033,
      "step": 635
    },
    {
      "epoch": 0.1861281826163301,
      "grad_norm": 0.04130464419722557,
      "learning_rate": 4.534679543459175e-05,
      "loss": 0.0007,
      "step": 636
    },
    {
      "epoch": 0.18642083699151302,
      "grad_norm": 0.12119077146053314,
      "learning_rate": 4.533947907521218e-05,
      "loss": 0.0008,
      "step": 637
    },
    {
      "epoch": 0.18671349136669593,
      "grad_norm": 0.020508840680122375,
      "learning_rate": 4.533216271583261e-05,
      "loss": 0.0003,
      "step": 638
    },
    {
      "epoch": 0.18700614574187885,
      "grad_norm": 11.085396766662598,
      "learning_rate": 4.5324846356453036e-05,
      "loss": 0.1443,
      "step": 639
    },
    {
      "epoch": 0.18729880011706174,
      "grad_norm": 0.008764538913965225,
      "learning_rate": 4.531752999707346e-05,
      "loss": 0.0001,
      "step": 640
    },
    {
      "epoch": 0.18759145449224465,
      "grad_norm": 6.063767433166504,
      "learning_rate": 4.5310213637693885e-05,
      "loss": 0.0167,
      "step": 641
    },
    {
      "epoch": 0.18788410886742757,
      "grad_norm": 0.004624322522431612,
      "learning_rate": 4.530289727831431e-05,
      "loss": 0.0001,
      "step": 642
    },
    {
      "epoch": 0.18817676324261048,
      "grad_norm": 5.405406951904297,
      "learning_rate": 4.529558091893474e-05,
      "loss": 0.2235,
      "step": 643
    },
    {
      "epoch": 0.18846941761779337,
      "grad_norm": 2.0690550804138184,
      "learning_rate": 4.528826455955517e-05,
      "loss": 0.193,
      "step": 644
    },
    {
      "epoch": 0.1887620719929763,
      "grad_norm": 7.38582706451416,
      "learning_rate": 4.52809482001756e-05,
      "loss": 0.1021,
      "step": 645
    },
    {
      "epoch": 0.1890547263681592,
      "grad_norm": 6.473080158233643,
      "learning_rate": 4.5273631840796025e-05,
      "loss": 0.1266,
      "step": 646
    },
    {
      "epoch": 0.18934738074334212,
      "grad_norm": 1.8652619123458862,
      "learning_rate": 4.526631548141645e-05,
      "loss": 0.0047,
      "step": 647
    },
    {
      "epoch": 0.18964003511852504,
      "grad_norm": 4.676137447357178,
      "learning_rate": 4.5258999122036874e-05,
      "loss": 0.2113,
      "step": 648
    },
    {
      "epoch": 0.18993268949370792,
      "grad_norm": 0.1120869368314743,
      "learning_rate": 4.52516827626573e-05,
      "loss": 0.0022,
      "step": 649
    },
    {
      "epoch": 0.19022534386889084,
      "grad_norm": 0.43787863850593567,
      "learning_rate": 4.524436640327773e-05,
      "loss": 0.008,
      "step": 650
    },
    {
      "epoch": 0.19051799824407376,
      "grad_norm": 1.8265527486801147,
      "learning_rate": 4.523705004389816e-05,
      "loss": 0.0165,
      "step": 651
    },
    {
      "epoch": 0.19081065261925667,
      "grad_norm": 14.509317398071289,
      "learning_rate": 4.5229733684518586e-05,
      "loss": 0.2503,
      "step": 652
    },
    {
      "epoch": 0.19110330699443956,
      "grad_norm": 14.345330238342285,
      "learning_rate": 4.5222417325139013e-05,
      "loss": 0.1041,
      "step": 653
    },
    {
      "epoch": 0.19139596136962247,
      "grad_norm": 8.671512603759766,
      "learning_rate": 4.521510096575944e-05,
      "loss": 0.2108,
      "step": 654
    },
    {
      "epoch": 0.1916886157448054,
      "grad_norm": 10.393280029296875,
      "learning_rate": 4.520778460637987e-05,
      "loss": 0.179,
      "step": 655
    },
    {
      "epoch": 0.1919812701199883,
      "grad_norm": 3.7070400714874268,
      "learning_rate": 4.520046824700029e-05,
      "loss": 0.0498,
      "step": 656
    },
    {
      "epoch": 0.1922739244951712,
      "grad_norm": 0.06468917429447174,
      "learning_rate": 4.519315188762072e-05,
      "loss": 0.0014,
      "step": 657
    },
    {
      "epoch": 0.1925665788703541,
      "grad_norm": 0.12241574376821518,
      "learning_rate": 4.5185835528241146e-05,
      "loss": 0.0027,
      "step": 658
    },
    {
      "epoch": 0.19285923324553703,
      "grad_norm": 0.07482358813285828,
      "learning_rate": 4.5178519168861574e-05,
      "loss": 0.0016,
      "step": 659
    },
    {
      "epoch": 0.19315188762071994,
      "grad_norm": 4.144123554229736,
      "learning_rate": 4.5171202809482e-05,
      "loss": 0.1355,
      "step": 660
    },
    {
      "epoch": 0.19344454199590283,
      "grad_norm": 0.0915708839893341,
      "learning_rate": 4.516388645010243e-05,
      "loss": 0.0021,
      "step": 661
    },
    {
      "epoch": 0.19373719637108575,
      "grad_norm": 0.09312600642442703,
      "learning_rate": 4.515657009072286e-05,
      "loss": 0.002,
      "step": 662
    },
    {
      "epoch": 0.19402985074626866,
      "grad_norm": 3.0293922424316406,
      "learning_rate": 4.5149253731343286e-05,
      "loss": 0.1203,
      "step": 663
    },
    {
      "epoch": 0.19432250512145158,
      "grad_norm": 0.09050063043832779,
      "learning_rate": 4.514193737196371e-05,
      "loss": 0.0022,
      "step": 664
    },
    {
      "epoch": 0.19461515949663447,
      "grad_norm": 0.5784649848937988,
      "learning_rate": 4.5134621012584135e-05,
      "loss": 0.006,
      "step": 665
    },
    {
      "epoch": 0.19490781387181738,
      "grad_norm": 3.1412320137023926,
      "learning_rate": 4.512730465320456e-05,
      "loss": 0.0228,
      "step": 666
    },
    {
      "epoch": 0.1952004682470003,
      "grad_norm": 5.518512725830078,
      "learning_rate": 4.511998829382499e-05,
      "loss": 0.061,
      "step": 667
    },
    {
      "epoch": 0.1954931226221832,
      "grad_norm": 0.11754651367664337,
      "learning_rate": 4.511267193444542e-05,
      "loss": 0.0025,
      "step": 668
    },
    {
      "epoch": 0.1957857769973661,
      "grad_norm": 0.18826378881931305,
      "learning_rate": 4.510535557506585e-05,
      "loss": 0.0033,
      "step": 669
    },
    {
      "epoch": 0.19607843137254902,
      "grad_norm": 0.12070680409669876,
      "learning_rate": 4.5098039215686275e-05,
      "loss": 0.0022,
      "step": 670
    },
    {
      "epoch": 0.19637108574773193,
      "grad_norm": 0.133676216006279,
      "learning_rate": 4.50907228563067e-05,
      "loss": 0.0022,
      "step": 671
    },
    {
      "epoch": 0.19666374012291485,
      "grad_norm": 0.669661283493042,
      "learning_rate": 4.508340649692713e-05,
      "loss": 0.0086,
      "step": 672
    },
    {
      "epoch": 0.19695639449809774,
      "grad_norm": 0.17344354093074799,
      "learning_rate": 4.507609013754756e-05,
      "loss": 0.0023,
      "step": 673
    },
    {
      "epoch": 0.19724904887328065,
      "grad_norm": 0.12338634580373764,
      "learning_rate": 4.506877377816798e-05,
      "loss": 0.0021,
      "step": 674
    },
    {
      "epoch": 0.19754170324846357,
      "grad_norm": 0.06881749629974365,
      "learning_rate": 4.506145741878841e-05,
      "loss": 0.001,
      "step": 675
    },
    {
      "epoch": 0.19783435762364648,
      "grad_norm": 0.038889095187187195,
      "learning_rate": 4.5054141059408836e-05,
      "loss": 0.0008,
      "step": 676
    },
    {
      "epoch": 0.19812701199882937,
      "grad_norm": 0.014144466258585453,
      "learning_rate": 4.5046824700029264e-05,
      "loss": 0.0003,
      "step": 677
    },
    {
      "epoch": 0.1984196663740123,
      "grad_norm": 0.16300074756145477,
      "learning_rate": 4.503950834064969e-05,
      "loss": 0.001,
      "step": 678
    },
    {
      "epoch": 0.1987123207491952,
      "grad_norm": 4.094395160675049,
      "learning_rate": 4.503219198127012e-05,
      "loss": 0.0307,
      "step": 679
    },
    {
      "epoch": 0.19900497512437812,
      "grad_norm": 0.00436925096437335,
      "learning_rate": 4.502487562189055e-05,
      "loss": 0.0001,
      "step": 680
    },
    {
      "epoch": 0.199297629499561,
      "grad_norm": 0.013027762062847614,
      "learning_rate": 4.5017559262510976e-05,
      "loss": 0.0002,
      "step": 681
    },
    {
      "epoch": 0.19959028387474392,
      "grad_norm": 0.031487055122852325,
      "learning_rate": 4.5010242903131404e-05,
      "loss": 0.0004,
      "step": 682
    },
    {
      "epoch": 0.19988293824992684,
      "grad_norm": 6.214965343475342,
      "learning_rate": 4.500292654375183e-05,
      "loss": 0.1875,
      "step": 683
    },
    {
      "epoch": 0.20017559262510976,
      "grad_norm": 0.6754149198532104,
      "learning_rate": 4.499561018437226e-05,
      "loss": 0.004,
      "step": 684
    },
    {
      "epoch": 0.20046824700029264,
      "grad_norm": 0.68619704246521,
      "learning_rate": 4.498829382499268e-05,
      "loss": 0.0044,
      "step": 685
    },
    {
      "epoch": 0.20076090137547556,
      "grad_norm": 0.06463277339935303,
      "learning_rate": 4.498097746561311e-05,
      "loss": 0.0007,
      "step": 686
    },
    {
      "epoch": 0.20105355575065847,
      "grad_norm": 0.10290379077196121,
      "learning_rate": 4.497366110623354e-05,
      "loss": 0.0009,
      "step": 687
    },
    {
      "epoch": 0.2013462101258414,
      "grad_norm": 9.652802467346191,
      "learning_rate": 4.4966344746853965e-05,
      "loss": 0.0612,
      "step": 688
    },
    {
      "epoch": 0.20163886450102428,
      "grad_norm": 0.0042547243647277355,
      "learning_rate": 4.495902838747439e-05,
      "loss": 0.0001,
      "step": 689
    },
    {
      "epoch": 0.2019315188762072,
      "grad_norm": 0.004024684429168701,
      "learning_rate": 4.495171202809482e-05,
      "loss": 0.0001,
      "step": 690
    },
    {
      "epoch": 0.2022241732513901,
      "grad_norm": 0.2425258457660675,
      "learning_rate": 4.494439566871525e-05,
      "loss": 0.0008,
      "step": 691
    },
    {
      "epoch": 0.20251682762657303,
      "grad_norm": 0.2894352376461029,
      "learning_rate": 4.4937079309335676e-05,
      "loss": 0.0011,
      "step": 692
    },
    {
      "epoch": 0.20280948200175591,
      "grad_norm": 14.928994178771973,
      "learning_rate": 4.4929762949956104e-05,
      "loss": 0.3129,
      "step": 693
    },
    {
      "epoch": 0.20310213637693883,
      "grad_norm": 7.792734622955322,
      "learning_rate": 4.492244659057653e-05,
      "loss": 0.1719,
      "step": 694
    },
    {
      "epoch": 0.20339479075212175,
      "grad_norm": 0.8874497413635254,
      "learning_rate": 4.491513023119696e-05,
      "loss": 0.0033,
      "step": 695
    },
    {
      "epoch": 0.20368744512730466,
      "grad_norm": 0.013569723814725876,
      "learning_rate": 4.490781387181738e-05,
      "loss": 0.0002,
      "step": 696
    },
    {
      "epoch": 0.20398009950248755,
      "grad_norm": 0.1591472625732422,
      "learning_rate": 4.490049751243781e-05,
      "loss": 0.0006,
      "step": 697
    },
    {
      "epoch": 0.20427275387767047,
      "grad_norm": 6.823668479919434,
      "learning_rate": 4.489318115305824e-05,
      "loss": 0.1749,
      "step": 698
    },
    {
      "epoch": 0.20456540825285338,
      "grad_norm": 0.015314355492591858,
      "learning_rate": 4.4885864793678665e-05,
      "loss": 0.0002,
      "step": 699
    },
    {
      "epoch": 0.2048580626280363,
      "grad_norm": 0.023964963853359222,
      "learning_rate": 4.487854843429909e-05,
      "loss": 0.0002,
      "step": 700
    },
    {
      "epoch": 0.20515071700321919,
      "grad_norm": 0.0187582578510046,
      "learning_rate": 4.487123207491952e-05,
      "loss": 0.0002,
      "step": 701
    },
    {
      "epoch": 0.2054433713784021,
      "grad_norm": 0.02765386924147606,
      "learning_rate": 4.486391571553995e-05,
      "loss": 0.0002,
      "step": 702
    },
    {
      "epoch": 0.20573602575358502,
      "grad_norm": 18.86209487915039,
      "learning_rate": 4.485659935616038e-05,
      "loss": 0.4532,
      "step": 703
    },
    {
      "epoch": 0.20602868012876793,
      "grad_norm": 8.715555191040039,
      "learning_rate": 4.4849282996780805e-05,
      "loss": 0.0359,
      "step": 704
    },
    {
      "epoch": 0.20632133450395082,
      "grad_norm": 0.07234326750040054,
      "learning_rate": 4.484196663740123e-05,
      "loss": 0.0006,
      "step": 705
    },
    {
      "epoch": 0.20661398887913374,
      "grad_norm": 0.045028142631053925,
      "learning_rate": 4.4834650278021654e-05,
      "loss": 0.0004,
      "step": 706
    },
    {
      "epoch": 0.20690664325431665,
      "grad_norm": 0.016763372346758842,
      "learning_rate": 4.482733391864208e-05,
      "loss": 0.0002,
      "step": 707
    },
    {
      "epoch": 0.20719929762949957,
      "grad_norm": 7.2011284828186035,
      "learning_rate": 4.482001755926251e-05,
      "loss": 0.1084,
      "step": 708
    },
    {
      "epoch": 0.20749195200468246,
      "grad_norm": 0.03069467470049858,
      "learning_rate": 4.481270119988294e-05,
      "loss": 0.0004,
      "step": 709
    },
    {
      "epoch": 0.20778460637986537,
      "grad_norm": 0.028901129961013794,
      "learning_rate": 4.4805384840503366e-05,
      "loss": 0.0004,
      "step": 710
    },
    {
      "epoch": 0.2080772607550483,
      "grad_norm": 0.00795326754450798,
      "learning_rate": 4.4798068481123794e-05,
      "loss": 0.0001,
      "step": 711
    },
    {
      "epoch": 0.2083699151302312,
      "grad_norm": 1.5992717742919922,
      "learning_rate": 4.479075212174422e-05,
      "loss": 0.0046,
      "step": 712
    },
    {
      "epoch": 0.20866256950541412,
      "grad_norm": 0.018271101638674736,
      "learning_rate": 4.478343576236465e-05,
      "loss": 0.0003,
      "step": 713
    },
    {
      "epoch": 0.208955223880597,
      "grad_norm": 0.28536123037338257,
      "learning_rate": 4.477611940298508e-05,
      "loss": 0.0015,
      "step": 714
    },
    {
      "epoch": 0.20924787825577992,
      "grad_norm": 0.010522237978875637,
      "learning_rate": 4.4768803043605506e-05,
      "loss": 0.0002,
      "step": 715
    },
    {
      "epoch": 0.20954053263096284,
      "grad_norm": 0.053751010447740555,
      "learning_rate": 4.4761486684225934e-05,
      "loss": 0.0006,
      "step": 716
    },
    {
      "epoch": 0.20983318700614575,
      "grad_norm": 0.0036731716245412827,
      "learning_rate": 4.4754170324846355e-05,
      "loss": 0.0001,
      "step": 717
    },
    {
      "epoch": 0.21012584138132864,
      "grad_norm": 0.004985527601093054,
      "learning_rate": 4.474685396546678e-05,
      "loss": 0.0001,
      "step": 718
    },
    {
      "epoch": 0.21041849575651156,
      "grad_norm": 0.004694411531090736,
      "learning_rate": 4.473953760608721e-05,
      "loss": 0.0001,
      "step": 719
    },
    {
      "epoch": 0.21071115013169447,
      "grad_norm": 0.0014005025150254369,
      "learning_rate": 4.473222124670764e-05,
      "loss": 0.0,
      "step": 720
    },
    {
      "epoch": 0.2110038045068774,
      "grad_norm": 1.1884040832519531,
      "learning_rate": 4.472490488732807e-05,
      "loss": 0.0041,
      "step": 721
    },
    {
      "epoch": 0.21129645888206028,
      "grad_norm": 0.0041051763109862804,
      "learning_rate": 4.4717588527948495e-05,
      "loss": 0.0001,
      "step": 722
    },
    {
      "epoch": 0.2115891132572432,
      "grad_norm": 0.001686644391156733,
      "learning_rate": 4.471027216856892e-05,
      "loss": 0.0,
      "step": 723
    },
    {
      "epoch": 0.2118817676324261,
      "grad_norm": 0.020543327555060387,
      "learning_rate": 4.470295580918935e-05,
      "loss": 0.0002,
      "step": 724
    },
    {
      "epoch": 0.21217442200760903,
      "grad_norm": 4.9859232902526855,
      "learning_rate": 4.469563944980978e-05,
      "loss": 0.0254,
      "step": 725
    },
    {
      "epoch": 0.21246707638279191,
      "grad_norm": 5.037113189697266,
      "learning_rate": 4.4688323090430206e-05,
      "loss": 0.1981,
      "step": 726
    },
    {
      "epoch": 0.21275973075797483,
      "grad_norm": 0.017563283443450928,
      "learning_rate": 4.468100673105063e-05,
      "loss": 0.0002,
      "step": 727
    },
    {
      "epoch": 0.21305238513315775,
      "grad_norm": 0.07505333423614502,
      "learning_rate": 4.4673690371671056e-05,
      "loss": 0.0006,
      "step": 728
    },
    {
      "epoch": 0.21334503950834066,
      "grad_norm": 0.003406686242669821,
      "learning_rate": 4.4666374012291483e-05,
      "loss": 0.0001,
      "step": 729
    },
    {
      "epoch": 0.21363769388352355,
      "grad_norm": 0.034339457750320435,
      "learning_rate": 4.465905765291191e-05,
      "loss": 0.0003,
      "step": 730
    },
    {
      "epoch": 0.21393034825870647,
      "grad_norm": 0.014503705315291882,
      "learning_rate": 4.465174129353234e-05,
      "loss": 0.0002,
      "step": 731
    },
    {
      "epoch": 0.21422300263388938,
      "grad_norm": 0.0040726629085838795,
      "learning_rate": 4.464442493415277e-05,
      "loss": 0.0001,
      "step": 732
    },
    {
      "epoch": 0.2145156570090723,
      "grad_norm": 0.0007581388927064836,
      "learning_rate": 4.4637108574773195e-05,
      "loss": 0.0,
      "step": 733
    },
    {
      "epoch": 0.21480831138425519,
      "grad_norm": 8.548843383789062,
      "learning_rate": 4.462979221539362e-05,
      "loss": 0.0632,
      "step": 734
    },
    {
      "epoch": 0.2151009657594381,
      "grad_norm": 1.2448025941848755,
      "learning_rate": 4.462247585601405e-05,
      "loss": 0.0068,
      "step": 735
    },
    {
      "epoch": 0.21539362013462102,
      "grad_norm": 0.013233359903097153,
      "learning_rate": 4.461515949663448e-05,
      "loss": 0.0002,
      "step": 736
    },
    {
      "epoch": 0.21568627450980393,
      "grad_norm": 3.531691312789917,
      "learning_rate": 4.460784313725491e-05,
      "loss": 0.2103,
      "step": 737
    },
    {
      "epoch": 0.21597892888498682,
      "grad_norm": 0.05020001903176308,
      "learning_rate": 4.460052677787533e-05,
      "loss": 0.0003,
      "step": 738
    },
    {
      "epoch": 0.21627158326016974,
      "grad_norm": 1.1592589616775513,
      "learning_rate": 4.4593210418495756e-05,
      "loss": 0.0047,
      "step": 739
    },
    {
      "epoch": 0.21656423763535265,
      "grad_norm": 12.470833778381348,
      "learning_rate": 4.4585894059116184e-05,
      "loss": 0.1691,
      "step": 740
    },
    {
      "epoch": 0.21685689201053557,
      "grad_norm": 0.032759133726358414,
      "learning_rate": 4.457857769973661e-05,
      "loss": 0.0004,
      "step": 741
    },
    {
      "epoch": 0.21714954638571846,
      "grad_norm": 0.06250883638858795,
      "learning_rate": 4.457126134035704e-05,
      "loss": 0.0005,
      "step": 742
    },
    {
      "epoch": 0.21744220076090137,
      "grad_norm": 0.12659525871276855,
      "learning_rate": 4.456394498097747e-05,
      "loss": 0.0013,
      "step": 743
    },
    {
      "epoch": 0.2177348551360843,
      "grad_norm": 0.06023351475596428,
      "learning_rate": 4.4556628621597896e-05,
      "loss": 0.0005,
      "step": 744
    },
    {
      "epoch": 0.2180275095112672,
      "grad_norm": 0.01682423986494541,
      "learning_rate": 4.4549312262218324e-05,
      "loss": 0.0003,
      "step": 745
    },
    {
      "epoch": 0.2183201638864501,
      "grad_norm": 0.016918722540140152,
      "learning_rate": 4.454199590283875e-05,
      "loss": 0.0004,
      "step": 746
    },
    {
      "epoch": 0.218612818261633,
      "grad_norm": 0.5990718007087708,
      "learning_rate": 4.453467954345918e-05,
      "loss": 0.004,
      "step": 747
    },
    {
      "epoch": 0.21890547263681592,
      "grad_norm": 3.5272769927978516,
      "learning_rate": 4.452736318407961e-05,
      "loss": 0.1258,
      "step": 748
    },
    {
      "epoch": 0.21919812701199884,
      "grad_norm": 0.029189743101596832,
      "learning_rate": 4.452004682470003e-05,
      "loss": 0.0005,
      "step": 749
    },
    {
      "epoch": 0.21949078138718173,
      "grad_norm": 0.09604718536138535,
      "learning_rate": 4.451273046532046e-05,
      "loss": 0.0009,
      "step": 750
    },
    {
      "epoch": 0.21978343576236464,
      "grad_norm": 0.01348460279405117,
      "learning_rate": 4.4505414105940885e-05,
      "loss": 0.0004,
      "step": 751
    },
    {
      "epoch": 0.22007609013754756,
      "grad_norm": 0.020246319472789764,
      "learning_rate": 4.449809774656131e-05,
      "loss": 0.0005,
      "step": 752
    },
    {
      "epoch": 0.22036874451273047,
      "grad_norm": 0.014105631969869137,
      "learning_rate": 4.449078138718174e-05,
      "loss": 0.0003,
      "step": 753
    },
    {
      "epoch": 0.22066139888791336,
      "grad_norm": 0.07877824455499649,
      "learning_rate": 4.448346502780217e-05,
      "loss": 0.001,
      "step": 754
    },
    {
      "epoch": 0.22095405326309628,
      "grad_norm": 1.8267985582351685,
      "learning_rate": 4.44761486684226e-05,
      "loss": 0.0129,
      "step": 755
    },
    {
      "epoch": 0.2212467076382792,
      "grad_norm": 0.6814830899238586,
      "learning_rate": 4.4468832309043025e-05,
      "loss": 0.0047,
      "step": 756
    },
    {
      "epoch": 0.2215393620134621,
      "grad_norm": 0.25179216265678406,
      "learning_rate": 4.446151594966345e-05,
      "loss": 0.0023,
      "step": 757
    },
    {
      "epoch": 0.221832016388645,
      "grad_norm": 0.9964368343353271,
      "learning_rate": 4.445419959028388e-05,
      "loss": 0.0064,
      "step": 758
    },
    {
      "epoch": 0.2221246707638279,
      "grad_norm": 0.05321267619729042,
      "learning_rate": 4.44468832309043e-05,
      "loss": 0.0008,
      "step": 759
    },
    {
      "epoch": 0.22241732513901083,
      "grad_norm": 2.7881665229797363,
      "learning_rate": 4.443956687152473e-05,
      "loss": 0.2114,
      "step": 760
    },
    {
      "epoch": 0.22270997951419375,
      "grad_norm": 0.029525209218263626,
      "learning_rate": 4.443225051214516e-05,
      "loss": 0.0005,
      "step": 761
    },
    {
      "epoch": 0.22300263388937663,
      "grad_norm": 0.019108805805444717,
      "learning_rate": 4.4424934152765586e-05,
      "loss": 0.0004,
      "step": 762
    },
    {
      "epoch": 0.22329528826455955,
      "grad_norm": 0.5585194230079651,
      "learning_rate": 4.4417617793386013e-05,
      "loss": 0.0035,
      "step": 763
    },
    {
      "epoch": 0.22358794263974247,
      "grad_norm": 0.012019911780953407,
      "learning_rate": 4.441030143400644e-05,
      "loss": 0.0002,
      "step": 764
    },
    {
      "epoch": 0.22388059701492538,
      "grad_norm": 0.020603956654667854,
      "learning_rate": 4.440298507462687e-05,
      "loss": 0.0006,
      "step": 765
    },
    {
      "epoch": 0.22417325139010827,
      "grad_norm": 5.0426836013793945,
      "learning_rate": 4.43956687152473e-05,
      "loss": 0.023,
      "step": 766
    },
    {
      "epoch": 0.22446590576529118,
      "grad_norm": 2.308359384536743,
      "learning_rate": 4.4388352355867725e-05,
      "loss": 0.2389,
      "step": 767
    },
    {
      "epoch": 0.2247585601404741,
      "grad_norm": 4.962514400482178,
      "learning_rate": 4.438103599648815e-05,
      "loss": 0.0525,
      "step": 768
    },
    {
      "epoch": 0.22505121451565702,
      "grad_norm": 0.06218094751238823,
      "learning_rate": 4.437371963710858e-05,
      "loss": 0.0008,
      "step": 769
    },
    {
      "epoch": 0.2253438688908399,
      "grad_norm": 0.8410007953643799,
      "learning_rate": 4.4366403277729e-05,
      "loss": 0.0056,
      "step": 770
    },
    {
      "epoch": 0.22563652326602282,
      "grad_norm": 3.1172614097595215,
      "learning_rate": 4.435908691834943e-05,
      "loss": 0.0257,
      "step": 771
    },
    {
      "epoch": 0.22592917764120574,
      "grad_norm": 0.033022698014974594,
      "learning_rate": 4.435177055896986e-05,
      "loss": 0.0006,
      "step": 772
    },
    {
      "epoch": 0.22622183201638865,
      "grad_norm": 0.10443870723247528,
      "learning_rate": 4.4344454199590286e-05,
      "loss": 0.0015,
      "step": 773
    },
    {
      "epoch": 0.22651448639157157,
      "grad_norm": 0.13600991666316986,
      "learning_rate": 4.4337137840210714e-05,
      "loss": 0.0017,
      "step": 774
    },
    {
      "epoch": 0.22680714076675446,
      "grad_norm": 3.21096134185791,
      "learning_rate": 4.432982148083114e-05,
      "loss": 0.0155,
      "step": 775
    },
    {
      "epoch": 0.22709979514193737,
      "grad_norm": 2.4363033771514893,
      "learning_rate": 4.432250512145157e-05,
      "loss": 0.0151,
      "step": 776
    },
    {
      "epoch": 0.2273924495171203,
      "grad_norm": 0.03876936063170433,
      "learning_rate": 4.4315188762072e-05,
      "loss": 0.0011,
      "step": 777
    },
    {
      "epoch": 0.2276851038923032,
      "grad_norm": 0.023100847378373146,
      "learning_rate": 4.4307872402692426e-05,
      "loss": 0.0007,
      "step": 778
    },
    {
      "epoch": 0.2279777582674861,
      "grad_norm": 0.018950048834085464,
      "learning_rate": 4.4300556043312854e-05,
      "loss": 0.0004,
      "step": 779
    },
    {
      "epoch": 0.228270412642669,
      "grad_norm": 0.0254242904484272,
      "learning_rate": 4.4293239683933275e-05,
      "loss": 0.0008,
      "step": 780
    },
    {
      "epoch": 0.22856306701785192,
      "grad_norm": 0.7936460971832275,
      "learning_rate": 4.42859233245537e-05,
      "loss": 0.0046,
      "step": 781
    },
    {
      "epoch": 0.22885572139303484,
      "grad_norm": 0.050249021500349045,
      "learning_rate": 4.427860696517413e-05,
      "loss": 0.001,
      "step": 782
    },
    {
      "epoch": 0.22914837576821773,
      "grad_norm": 0.08169472962617874,
      "learning_rate": 4.427129060579456e-05,
      "loss": 0.0012,
      "step": 783
    },
    {
      "epoch": 0.22944103014340064,
      "grad_norm": 6.499297618865967,
      "learning_rate": 4.426397424641499e-05,
      "loss": 0.0624,
      "step": 784
    },
    {
      "epoch": 0.22973368451858356,
      "grad_norm": 0.02524263970553875,
      "learning_rate": 4.4256657887035415e-05,
      "loss": 0.0005,
      "step": 785
    },
    {
      "epoch": 0.23002633889376647,
      "grad_norm": 5.125154495239258,
      "learning_rate": 4.424934152765584e-05,
      "loss": 0.0226,
      "step": 786
    },
    {
      "epoch": 0.23031899326894936,
      "grad_norm": 6.894948482513428,
      "learning_rate": 4.424202516827627e-05,
      "loss": 0.0668,
      "step": 787
    },
    {
      "epoch": 0.23061164764413228,
      "grad_norm": 5.20366096496582,
      "learning_rate": 4.42347088088967e-05,
      "loss": 0.0671,
      "step": 788
    },
    {
      "epoch": 0.2309043020193152,
      "grad_norm": 0.05931296572089195,
      "learning_rate": 4.422739244951713e-05,
      "loss": 0.0009,
      "step": 789
    },
    {
      "epoch": 0.2311969563944981,
      "grad_norm": 0.00888920109719038,
      "learning_rate": 4.4220076090137555e-05,
      "loss": 0.0003,
      "step": 790
    },
    {
      "epoch": 0.231489610769681,
      "grad_norm": 0.012214032001793385,
      "learning_rate": 4.4212759730757976e-05,
      "loss": 0.0003,
      "step": 791
    },
    {
      "epoch": 0.2317822651448639,
      "grad_norm": 0.01478834543377161,
      "learning_rate": 4.4205443371378404e-05,
      "loss": 0.0004,
      "step": 792
    },
    {
      "epoch": 0.23207491952004683,
      "grad_norm": 3.1242382526397705,
      "learning_rate": 4.419812701199883e-05,
      "loss": 0.0407,
      "step": 793
    },
    {
      "epoch": 0.23236757389522975,
      "grad_norm": 0.013407468795776367,
      "learning_rate": 4.419081065261926e-05,
      "loss": 0.0003,
      "step": 794
    },
    {
      "epoch": 0.23266022827041263,
      "grad_norm": 0.014789867214858532,
      "learning_rate": 4.418349429323969e-05,
      "loss": 0.0004,
      "step": 795
    },
    {
      "epoch": 0.23295288264559555,
      "grad_norm": 0.01953650824725628,
      "learning_rate": 4.4176177933860115e-05,
      "loss": 0.0004,
      "step": 796
    },
    {
      "epoch": 0.23324553702077847,
      "grad_norm": 0.03375176712870598,
      "learning_rate": 4.4168861574480543e-05,
      "loss": 0.0008,
      "step": 797
    },
    {
      "epoch": 0.23353819139596138,
      "grad_norm": 0.05466015264391899,
      "learning_rate": 4.416154521510097e-05,
      "loss": 0.0009,
      "step": 798
    },
    {
      "epoch": 0.23383084577114427,
      "grad_norm": 0.00844351015985012,
      "learning_rate": 4.41542288557214e-05,
      "loss": 0.0002,
      "step": 799
    },
    {
      "epoch": 0.23412350014632718,
      "grad_norm": 0.008441613055765629,
      "learning_rate": 4.414691249634183e-05,
      "loss": 0.0003,
      "step": 800
    },
    {
      "epoch": 0.2344161545215101,
      "grad_norm": 0.048100389540195465,
      "learning_rate": 4.4139596136962255e-05,
      "loss": 0.0008,
      "step": 801
    },
    {
      "epoch": 0.23470880889669302,
      "grad_norm": 6.122220516204834,
      "learning_rate": 4.4132279777582676e-05,
      "loss": 0.0348,
      "step": 802
    },
    {
      "epoch": 0.2350014632718759,
      "grad_norm": 0.006385037675499916,
      "learning_rate": 4.4124963418203104e-05,
      "loss": 0.0002,
      "step": 803
    },
    {
      "epoch": 0.23529411764705882,
      "grad_norm": 0.019083580002188683,
      "learning_rate": 4.411764705882353e-05,
      "loss": 0.0003,
      "step": 804
    },
    {
      "epoch": 0.23558677202224174,
      "grad_norm": 0.01814999058842659,
      "learning_rate": 4.411033069944396e-05,
      "loss": 0.0004,
      "step": 805
    },
    {
      "epoch": 0.23587942639742465,
      "grad_norm": 5.577958583831787,
      "learning_rate": 4.410301434006439e-05,
      "loss": 0.0333,
      "step": 806
    },
    {
      "epoch": 0.23617208077260754,
      "grad_norm": 2.3570194244384766,
      "learning_rate": 4.4095697980684816e-05,
      "loss": 0.0099,
      "step": 807
    },
    {
      "epoch": 0.23646473514779046,
      "grad_norm": 0.007592096459120512,
      "learning_rate": 4.4088381621305244e-05,
      "loss": 0.0002,
      "step": 808
    },
    {
      "epoch": 0.23675738952297337,
      "grad_norm": 1.1854114532470703,
      "learning_rate": 4.408106526192567e-05,
      "loss": 0.0053,
      "step": 809
    },
    {
      "epoch": 0.2370500438981563,
      "grad_norm": 0.4952651560306549,
      "learning_rate": 4.40737489025461e-05,
      "loss": 0.0022,
      "step": 810
    },
    {
      "epoch": 0.23734269827333918,
      "grad_norm": 0.014262191019952297,
      "learning_rate": 4.406643254316653e-05,
      "loss": 0.0003,
      "step": 811
    },
    {
      "epoch": 0.2376353526485221,
      "grad_norm": 0.02860407717525959,
      "learning_rate": 4.405911618378695e-05,
      "loss": 0.0005,
      "step": 812
    },
    {
      "epoch": 0.237928007023705,
      "grad_norm": 1.1772172451019287,
      "learning_rate": 4.405179982440738e-05,
      "loss": 0.0047,
      "step": 813
    },
    {
      "epoch": 0.23822066139888792,
      "grad_norm": 4.126893043518066,
      "learning_rate": 4.4044483465027805e-05,
      "loss": 0.0133,
      "step": 814
    },
    {
      "epoch": 0.2385133157740708,
      "grad_norm": 4.438233852386475,
      "learning_rate": 4.403716710564823e-05,
      "loss": 0.1691,
      "step": 815
    },
    {
      "epoch": 0.23880597014925373,
      "grad_norm": 1.1588010787963867,
      "learning_rate": 4.402985074626866e-05,
      "loss": 0.0035,
      "step": 816
    },
    {
      "epoch": 0.23909862452443664,
      "grad_norm": 0.06502325087785721,
      "learning_rate": 4.402253438688909e-05,
      "loss": 0.0006,
      "step": 817
    },
    {
      "epoch": 0.23939127889961956,
      "grad_norm": 0.0038198577240109444,
      "learning_rate": 4.401521802750952e-05,
      "loss": 0.0001,
      "step": 818
    },
    {
      "epoch": 0.23968393327480245,
      "grad_norm": 0.004416614770889282,
      "learning_rate": 4.4007901668129945e-05,
      "loss": 0.0001,
      "step": 819
    },
    {
      "epoch": 0.23997658764998536,
      "grad_norm": 3.8089776039123535,
      "learning_rate": 4.400058530875037e-05,
      "loss": 0.1348,
      "step": 820
    },
    {
      "epoch": 0.24026924202516828,
      "grad_norm": 0.49052879214286804,
      "learning_rate": 4.3993268949370794e-05,
      "loss": 0.0011,
      "step": 821
    },
    {
      "epoch": 0.2405618964003512,
      "grad_norm": 7.548944473266602,
      "learning_rate": 4.398595258999122e-05,
      "loss": 0.058,
      "step": 822
    },
    {
      "epoch": 0.24085455077553408,
      "grad_norm": 0.008259841240942478,
      "learning_rate": 4.397863623061165e-05,
      "loss": 0.0002,
      "step": 823
    },
    {
      "epoch": 0.241147205150717,
      "grad_norm": 0.049051620066165924,
      "learning_rate": 4.397131987123208e-05,
      "loss": 0.0008,
      "step": 824
    },
    {
      "epoch": 0.2414398595258999,
      "grad_norm": 0.03358924388885498,
      "learning_rate": 4.3964003511852506e-05,
      "loss": 0.0007,
      "step": 825
    },
    {
      "epoch": 0.24173251390108283,
      "grad_norm": 0.38548314571380615,
      "learning_rate": 4.3956687152472934e-05,
      "loss": 0.0027,
      "step": 826
    },
    {
      "epoch": 0.24202516827626572,
      "grad_norm": 0.1643989384174347,
      "learning_rate": 4.394937079309336e-05,
      "loss": 0.0023,
      "step": 827
    },
    {
      "epoch": 0.24231782265144863,
      "grad_norm": 0.02894510142505169,
      "learning_rate": 4.394205443371379e-05,
      "loss": 0.0005,
      "step": 828
    },
    {
      "epoch": 0.24261047702663155,
      "grad_norm": 0.06682289391756058,
      "learning_rate": 4.393473807433421e-05,
      "loss": 0.0009,
      "step": 829
    },
    {
      "epoch": 0.24290313140181446,
      "grad_norm": 1.2807080745697021,
      "learning_rate": 4.392742171495464e-05,
      "loss": 0.0079,
      "step": 830
    },
    {
      "epoch": 0.24319578577699735,
      "grad_norm": 0.08425843715667725,
      "learning_rate": 4.392010535557507e-05,
      "loss": 0.0005,
      "step": 831
    },
    {
      "epoch": 0.24348844015218027,
      "grad_norm": 0.10579044371843338,
      "learning_rate": 4.3912788996195495e-05,
      "loss": 0.0011,
      "step": 832
    },
    {
      "epoch": 0.24378109452736318,
      "grad_norm": 0.05214914306998253,
      "learning_rate": 4.390547263681592e-05,
      "loss": 0.0006,
      "step": 833
    },
    {
      "epoch": 0.2440737489025461,
      "grad_norm": 0.014113985002040863,
      "learning_rate": 4.389815627743635e-05,
      "loss": 0.0003,
      "step": 834
    },
    {
      "epoch": 0.244366403277729,
      "grad_norm": 0.004227771423757076,
      "learning_rate": 4.389083991805678e-05,
      "loss": 0.0001,
      "step": 835
    },
    {
      "epoch": 0.2446590576529119,
      "grad_norm": 0.005914884619414806,
      "learning_rate": 4.3883523558677206e-05,
      "loss": 0.0001,
      "step": 836
    },
    {
      "epoch": 0.24495171202809482,
      "grad_norm": 0.004017166327685118,
      "learning_rate": 4.387620719929763e-05,
      "loss": 0.0001,
      "step": 837
    },
    {
      "epoch": 0.24524436640327774,
      "grad_norm": 0.003356066532433033,
      "learning_rate": 4.3868890839918056e-05,
      "loss": 0.0001,
      "step": 838
    },
    {
      "epoch": 0.24553702077846065,
      "grad_norm": 2.8616511821746826,
      "learning_rate": 4.3861574480538483e-05,
      "loss": 0.0039,
      "step": 839
    },
    {
      "epoch": 0.24582967515364354,
      "grad_norm": 0.2694534957408905,
      "learning_rate": 4.385425812115891e-05,
      "loss": 0.001,
      "step": 840
    },
    {
      "epoch": 0.24612232952882646,
      "grad_norm": 0.003960182890295982,
      "learning_rate": 4.384694176177934e-05,
      "loss": 0.0001,
      "step": 841
    },
    {
      "epoch": 0.24641498390400937,
      "grad_norm": 0.1128448098897934,
      "learning_rate": 4.383962540239977e-05,
      "loss": 0.0004,
      "step": 842
    },
    {
      "epoch": 0.2467076382791923,
      "grad_norm": 0.0031736004166305065,
      "learning_rate": 4.3832309043020195e-05,
      "loss": 0.0001,
      "step": 843
    },
    {
      "epoch": 0.24700029265437518,
      "grad_norm": 0.011300680227577686,
      "learning_rate": 4.382499268364062e-05,
      "loss": 0.0002,
      "step": 844
    },
    {
      "epoch": 0.2472929470295581,
      "grad_norm": 0.006444556172937155,
      "learning_rate": 4.3817676324261044e-05,
      "loss": 0.0001,
      "step": 845
    },
    {
      "epoch": 0.247585601404741,
      "grad_norm": 0.07491226494312286,
      "learning_rate": 4.381035996488147e-05,
      "loss": 0.0006,
      "step": 846
    },
    {
      "epoch": 0.24787825577992392,
      "grad_norm": 0.0015734657645225525,
      "learning_rate": 4.38030436055019e-05,
      "loss": 0.0,
      "step": 847
    },
    {
      "epoch": 0.2481709101551068,
      "grad_norm": 3.5701217651367188,
      "learning_rate": 4.379572724612233e-05,
      "loss": 0.2921,
      "step": 848
    },
    {
      "epoch": 0.24846356453028973,
      "grad_norm": 0.04214177653193474,
      "learning_rate": 4.3788410886742756e-05,
      "loss": 0.0003,
      "step": 849
    },
    {
      "epoch": 0.24875621890547264,
      "grad_norm": 12.9547119140625,
      "learning_rate": 4.3781094527363184e-05,
      "loss": 0.0718,
      "step": 850
    },
    {
      "epoch": 0.24904887328065556,
      "grad_norm": 0.034693583846092224,
      "learning_rate": 4.377377816798361e-05,
      "loss": 0.0003,
      "step": 851
    },
    {
      "epoch": 0.24934152765583845,
      "grad_norm": 0.028678277507424355,
      "learning_rate": 4.376646180860404e-05,
      "loss": 0.0004,
      "step": 852
    },
    {
      "epoch": 0.24963418203102136,
      "grad_norm": 0.05104019492864609,
      "learning_rate": 4.375914544922447e-05,
      "loss": 0.0006,
      "step": 853
    },
    {
      "epoch": 0.24992683640620428,
      "grad_norm": 0.3913678824901581,
      "learning_rate": 4.3751829089844896e-05,
      "loss": 0.0018,
      "step": 854
    },
    {
      "epoch": 0.25021949078138717,
      "grad_norm": 0.0270369965583086,
      "learning_rate": 4.374451273046532e-05,
      "loss": 0.0004,
      "step": 855
    },
    {
      "epoch": 0.2505121451565701,
      "grad_norm": 0.012402846477925777,
      "learning_rate": 4.3737196371085745e-05,
      "loss": 0.0002,
      "step": 856
    },
    {
      "epoch": 0.250804799531753,
      "grad_norm": 0.015872538089752197,
      "learning_rate": 4.372988001170617e-05,
      "loss": 0.0003,
      "step": 857
    },
    {
      "epoch": 0.2510974539069359,
      "grad_norm": 0.007675354368984699,
      "learning_rate": 4.37225636523266e-05,
      "loss": 0.0001,
      "step": 858
    },
    {
      "epoch": 0.25139010828211883,
      "grad_norm": 0.005604118574410677,
      "learning_rate": 4.371524729294703e-05,
      "loss": 0.0001,
      "step": 859
    },
    {
      "epoch": 0.2516827626573017,
      "grad_norm": 0.011989112012088299,
      "learning_rate": 4.370793093356746e-05,
      "loss": 0.0003,
      "step": 860
    },
    {
      "epoch": 0.25197541703248466,
      "grad_norm": 0.005313977133482695,
      "learning_rate": 4.3700614574187885e-05,
      "loss": 0.0001,
      "step": 861
    },
    {
      "epoch": 0.25226807140766755,
      "grad_norm": 0.022295720875263214,
      "learning_rate": 4.369329821480831e-05,
      "loss": 0.0004,
      "step": 862
    },
    {
      "epoch": 0.25256072578285044,
      "grad_norm": 0.0032608299516141415,
      "learning_rate": 4.368598185542874e-05,
      "loss": 0.0001,
      "step": 863
    },
    {
      "epoch": 0.2528533801580334,
      "grad_norm": 13.01710033416748,
      "learning_rate": 4.367866549604917e-05,
      "loss": 0.0695,
      "step": 864
    },
    {
      "epoch": 0.25314603453321627,
      "grad_norm": 0.022410310804843903,
      "learning_rate": 4.367134913666959e-05,
      "loss": 0.0002,
      "step": 865
    },
    {
      "epoch": 0.25343868890839916,
      "grad_norm": 0.008261977694928646,
      "learning_rate": 4.366403277729002e-05,
      "loss": 0.0002,
      "step": 866
    },
    {
      "epoch": 0.2537313432835821,
      "grad_norm": 0.1107669249176979,
      "learning_rate": 4.3656716417910446e-05,
      "loss": 0.0005,
      "step": 867
    },
    {
      "epoch": 0.254023997658765,
      "grad_norm": 0.01423464436084032,
      "learning_rate": 4.3649400058530874e-05,
      "loss": 0.0003,
      "step": 868
    },
    {
      "epoch": 0.25431665203394793,
      "grad_norm": 0.015357548370957375,
      "learning_rate": 4.36420836991513e-05,
      "loss": 0.0003,
      "step": 869
    },
    {
      "epoch": 0.2546093064091308,
      "grad_norm": 0.006338243838399649,
      "learning_rate": 4.363476733977173e-05,
      "loss": 0.0001,
      "step": 870
    },
    {
      "epoch": 0.2549019607843137,
      "grad_norm": 0.012789924629032612,
      "learning_rate": 4.362745098039216e-05,
      "loss": 0.0002,
      "step": 871
    },
    {
      "epoch": 0.25519461515949665,
      "grad_norm": 0.006994856055825949,
      "learning_rate": 4.3620134621012586e-05,
      "loss": 0.0001,
      "step": 872
    },
    {
      "epoch": 0.25548726953467954,
      "grad_norm": 6.519273281097412,
      "learning_rate": 4.3612818261633013e-05,
      "loss": 0.1762,
      "step": 873
    },
    {
      "epoch": 0.25577992390986243,
      "grad_norm": 10.568449974060059,
      "learning_rate": 4.360550190225344e-05,
      "loss": 0.0666,
      "step": 874
    },
    {
      "epoch": 0.25607257828504537,
      "grad_norm": 0.27735745906829834,
      "learning_rate": 4.359818554287387e-05,
      "loss": 0.0008,
      "step": 875
    },
    {
      "epoch": 0.25636523266022826,
      "grad_norm": 1.7639248371124268,
      "learning_rate": 4.359086918349429e-05,
      "loss": 0.0049,
      "step": 876
    },
    {
      "epoch": 0.2566578870354112,
      "grad_norm": 0.015343848615884781,
      "learning_rate": 4.358355282411472e-05,
      "loss": 0.0001,
      "step": 877
    },
    {
      "epoch": 0.2569505414105941,
      "grad_norm": 0.010843094438314438,
      "learning_rate": 4.3576236464735146e-05,
      "loss": 0.0001,
      "step": 878
    },
    {
      "epoch": 0.257243195785777,
      "grad_norm": 1.0062090158462524,
      "learning_rate": 4.3568920105355574e-05,
      "loss": 0.0035,
      "step": 879
    },
    {
      "epoch": 0.2575358501609599,
      "grad_norm": 0.0034330443013459444,
      "learning_rate": 4.3561603745976e-05,
      "loss": 0.0001,
      "step": 880
    },
    {
      "epoch": 0.2578285045361428,
      "grad_norm": 0.02194824256002903,
      "learning_rate": 4.355428738659643e-05,
      "loss": 0.0002,
      "step": 881
    },
    {
      "epoch": 0.2581211589113257,
      "grad_norm": 0.9777836203575134,
      "learning_rate": 4.354697102721686e-05,
      "loss": 0.0039,
      "step": 882
    },
    {
      "epoch": 0.25841381328650864,
      "grad_norm": 0.003030191408470273,
      "learning_rate": 4.3539654667837286e-05,
      "loss": 0.0001,
      "step": 883
    },
    {
      "epoch": 0.25870646766169153,
      "grad_norm": 0.0041368212550878525,
      "learning_rate": 4.3532338308457714e-05,
      "loss": 0.0001,
      "step": 884
    },
    {
      "epoch": 0.2589991220368745,
      "grad_norm": 0.011940663680434227,
      "learning_rate": 4.352502194907814e-05,
      "loss": 0.0002,
      "step": 885
    },
    {
      "epoch": 0.25929177641205736,
      "grad_norm": 0.0035515271592885256,
      "learning_rate": 4.351770558969857e-05,
      "loss": 0.0001,
      "step": 886
    },
    {
      "epoch": 0.25958443078724025,
      "grad_norm": 5.958016395568848,
      "learning_rate": 4.351038923031899e-05,
      "loss": 0.1927,
      "step": 887
    },
    {
      "epoch": 0.2598770851624232,
      "grad_norm": 0.012839854694902897,
      "learning_rate": 4.350307287093942e-05,
      "loss": 0.0002,
      "step": 888
    },
    {
      "epoch": 0.2601697395376061,
      "grad_norm": 0.02360195852816105,
      "learning_rate": 4.349575651155985e-05,
      "loss": 0.0004,
      "step": 889
    },
    {
      "epoch": 0.26046239391278897,
      "grad_norm": 0.008355548605322838,
      "learning_rate": 4.3488440152180275e-05,
      "loss": 0.0002,
      "step": 890
    },
    {
      "epoch": 0.2607550482879719,
      "grad_norm": 0.010975964367389679,
      "learning_rate": 4.34811237928007e-05,
      "loss": 0.0002,
      "step": 891
    },
    {
      "epoch": 0.2610477026631548,
      "grad_norm": 0.005606422666460276,
      "learning_rate": 4.347380743342113e-05,
      "loss": 0.0001,
      "step": 892
    },
    {
      "epoch": 0.26134035703833774,
      "grad_norm": 0.0077472287230193615,
      "learning_rate": 4.346649107404156e-05,
      "loss": 0.0001,
      "step": 893
    },
    {
      "epoch": 0.26163301141352063,
      "grad_norm": 0.016559531912207603,
      "learning_rate": 4.345917471466199e-05,
      "loss": 0.0002,
      "step": 894
    },
    {
      "epoch": 0.2619256657887035,
      "grad_norm": 0.005465401802212,
      "learning_rate": 4.3451858355282415e-05,
      "loss": 0.0001,
      "step": 895
    },
    {
      "epoch": 0.26221832016388646,
      "grad_norm": 0.011149146594107151,
      "learning_rate": 4.344454199590284e-05,
      "loss": 0.0001,
      "step": 896
    },
    {
      "epoch": 0.26251097453906935,
      "grad_norm": 0.020837359130382538,
      "learning_rate": 4.3437225636523264e-05,
      "loss": 0.0002,
      "step": 897
    },
    {
      "epoch": 0.26280362891425224,
      "grad_norm": 9.050426483154297,
      "learning_rate": 4.342990927714369e-05,
      "loss": 0.024,
      "step": 898
    },
    {
      "epoch": 0.2630962832894352,
      "grad_norm": 0.4160962700843811,
      "learning_rate": 4.342259291776412e-05,
      "loss": 0.0014,
      "step": 899
    },
    {
      "epoch": 0.2633889376646181,
      "grad_norm": 0.048738472163677216,
      "learning_rate": 4.341527655838455e-05,
      "loss": 0.0004,
      "step": 900
    },
    {
      "epoch": 0.263681592039801,
      "grad_norm": 0.007950617000460625,
      "learning_rate": 4.3407960199004976e-05,
      "loss": 0.0002,
      "step": 901
    },
    {
      "epoch": 0.2639742464149839,
      "grad_norm": 0.036539409309625626,
      "learning_rate": 4.3400643839625404e-05,
      "loss": 0.0004,
      "step": 902
    },
    {
      "epoch": 0.2642669007901668,
      "grad_norm": 0.34213629364967346,
      "learning_rate": 4.339332748024583e-05,
      "loss": 0.0013,
      "step": 903
    },
    {
      "epoch": 0.26455955516534974,
      "grad_norm": 12.875584602355957,
      "learning_rate": 4.338601112086626e-05,
      "loss": 0.0282,
      "step": 904
    },
    {
      "epoch": 0.2648522095405326,
      "grad_norm": 1.863190770149231,
      "learning_rate": 4.337869476148669e-05,
      "loss": 0.0067,
      "step": 905
    },
    {
      "epoch": 0.26514486391571557,
      "grad_norm": 0.0017428912688046694,
      "learning_rate": 4.3371378402107115e-05,
      "loss": 0.0,
      "step": 906
    },
    {
      "epoch": 0.26543751829089846,
      "grad_norm": 0.0031504349317401648,
      "learning_rate": 4.3364062042727543e-05,
      "loss": 0.0,
      "step": 907
    },
    {
      "epoch": 0.26573017266608134,
      "grad_norm": 0.04455633834004402,
      "learning_rate": 4.3356745683347965e-05,
      "loss": 0.0003,
      "step": 908
    },
    {
      "epoch": 0.2660228270412643,
      "grad_norm": 0.27286088466644287,
      "learning_rate": 4.334942932396839e-05,
      "loss": 0.0018,
      "step": 909
    },
    {
      "epoch": 0.2663154814164472,
      "grad_norm": 6.610106468200684,
      "learning_rate": 4.334211296458882e-05,
      "loss": 0.045,
      "step": 910
    },
    {
      "epoch": 0.26660813579163006,
      "grad_norm": 0.0290259700268507,
      "learning_rate": 4.333479660520925e-05,
      "loss": 0.0003,
      "step": 911
    },
    {
      "epoch": 0.266900790166813,
      "grad_norm": 0.006859095301479101,
      "learning_rate": 4.3327480245829676e-05,
      "loss": 0.0001,
      "step": 912
    },
    {
      "epoch": 0.2671934445419959,
      "grad_norm": 0.007041125558316708,
      "learning_rate": 4.3320163886450104e-05,
      "loss": 0.0001,
      "step": 913
    },
    {
      "epoch": 0.26748609891717884,
      "grad_norm": 0.001733655110001564,
      "learning_rate": 4.331284752707053e-05,
      "loss": 0.0,
      "step": 914
    },
    {
      "epoch": 0.2677787532923617,
      "grad_norm": 0.005434884224087,
      "learning_rate": 4.330553116769096e-05,
      "loss": 0.0001,
      "step": 915
    },
    {
      "epoch": 0.2680714076675446,
      "grad_norm": 16.0704345703125,
      "learning_rate": 4.329821480831139e-05,
      "loss": 0.0313,
      "step": 916
    },
    {
      "epoch": 0.26836406204272756,
      "grad_norm": 0.001067645032890141,
      "learning_rate": 4.3290898448931816e-05,
      "loss": 0.0,
      "step": 917
    },
    {
      "epoch": 0.26865671641791045,
      "grad_norm": 0.0016751672374084592,
      "learning_rate": 4.328358208955224e-05,
      "loss": 0.0,
      "step": 918
    },
    {
      "epoch": 0.26894937079309333,
      "grad_norm": 0.01004913542419672,
      "learning_rate": 4.3276265730172665e-05,
      "loss": 0.0001,
      "step": 919
    },
    {
      "epoch": 0.2692420251682763,
      "grad_norm": 0.004952297545969486,
      "learning_rate": 4.326894937079309e-05,
      "loss": 0.0001,
      "step": 920
    },
    {
      "epoch": 0.26953467954345917,
      "grad_norm": 14.358283996582031,
      "learning_rate": 4.326163301141352e-05,
      "loss": 0.1519,
      "step": 921
    },
    {
      "epoch": 0.2698273339186421,
      "grad_norm": 12.556432723999023,
      "learning_rate": 4.325431665203395e-05,
      "loss": 0.0425,
      "step": 922
    },
    {
      "epoch": 0.270119988293825,
      "grad_norm": 0.0035831343848258257,
      "learning_rate": 4.324700029265438e-05,
      "loss": 0.0001,
      "step": 923
    },
    {
      "epoch": 0.2704126426690079,
      "grad_norm": 0.002111797221004963,
      "learning_rate": 4.3239683933274805e-05,
      "loss": 0.0,
      "step": 924
    },
    {
      "epoch": 0.27070529704419083,
      "grad_norm": 0.011011890135705471,
      "learning_rate": 4.323236757389523e-05,
      "loss": 0.0001,
      "step": 925
    },
    {
      "epoch": 0.2709979514193737,
      "grad_norm": 0.0017403267556801438,
      "learning_rate": 4.322505121451566e-05,
      "loss": 0.0,
      "step": 926
    },
    {
      "epoch": 0.2712906057945566,
      "grad_norm": 0.041412338614463806,
      "learning_rate": 4.321773485513609e-05,
      "loss": 0.0002,
      "step": 927
    },
    {
      "epoch": 0.27158326016973955,
      "grad_norm": 0.014695284888148308,
      "learning_rate": 4.321041849575652e-05,
      "loss": 0.0002,
      "step": 928
    },
    {
      "epoch": 0.27187591454492244,
      "grad_norm": 11.386469841003418,
      "learning_rate": 4.320310213637694e-05,
      "loss": 0.0317,
      "step": 929
    },
    {
      "epoch": 0.2721685689201054,
      "grad_norm": 3.229830503463745,
      "learning_rate": 4.3195785776997366e-05,
      "loss": 0.3326,
      "step": 930
    },
    {
      "epoch": 0.27246122329528827,
      "grad_norm": 0.0017760074697434902,
      "learning_rate": 4.3188469417617794e-05,
      "loss": 0.0,
      "step": 931
    },
    {
      "epoch": 0.27275387767047116,
      "grad_norm": 0.014468281529843807,
      "learning_rate": 4.318115305823822e-05,
      "loss": 0.0001,
      "step": 932
    },
    {
      "epoch": 0.2730465320456541,
      "grad_norm": 0.0017373122973367572,
      "learning_rate": 4.317383669885865e-05,
      "loss": 0.0,
      "step": 933
    },
    {
      "epoch": 0.273339186420837,
      "grad_norm": 0.008035775274038315,
      "learning_rate": 4.316652033947908e-05,
      "loss": 0.0001,
      "step": 934
    },
    {
      "epoch": 0.2736318407960199,
      "grad_norm": 0.045467838644981384,
      "learning_rate": 4.3159203980099506e-05,
      "loss": 0.0003,
      "step": 935
    },
    {
      "epoch": 0.2739244951712028,
      "grad_norm": 0.23960766196250916,
      "learning_rate": 4.3151887620719934e-05,
      "loss": 0.0009,
      "step": 936
    },
    {
      "epoch": 0.2742171495463857,
      "grad_norm": 0.42729949951171875,
      "learning_rate": 4.314457126134036e-05,
      "loss": 0.001,
      "step": 937
    },
    {
      "epoch": 0.27450980392156865,
      "grad_norm": 3.776639699935913,
      "learning_rate": 4.313725490196079e-05,
      "loss": 0.0164,
      "step": 938
    },
    {
      "epoch": 0.27480245829675154,
      "grad_norm": 0.010938864201307297,
      "learning_rate": 4.312993854258122e-05,
      "loss": 0.0002,
      "step": 939
    },
    {
      "epoch": 0.2750951126719344,
      "grad_norm": 0.025770241394639015,
      "learning_rate": 4.312262218320164e-05,
      "loss": 0.0002,
      "step": 940
    },
    {
      "epoch": 0.27538776704711737,
      "grad_norm": 0.03667419031262398,
      "learning_rate": 4.311530582382207e-05,
      "loss": 0.0003,
      "step": 941
    },
    {
      "epoch": 0.27568042142230026,
      "grad_norm": 5.51930570602417,
      "learning_rate": 4.3107989464442495e-05,
      "loss": 0.1458,
      "step": 942
    },
    {
      "epoch": 0.27597307579748315,
      "grad_norm": 5.226438045501709,
      "learning_rate": 4.310067310506292e-05,
      "loss": 0.0088,
      "step": 943
    },
    {
      "epoch": 0.2762657301726661,
      "grad_norm": 0.23687642812728882,
      "learning_rate": 4.309335674568335e-05,
      "loss": 0.0007,
      "step": 944
    },
    {
      "epoch": 0.276558384547849,
      "grad_norm": 0.021837415173649788,
      "learning_rate": 4.308604038630378e-05,
      "loss": 0.0004,
      "step": 945
    },
    {
      "epoch": 0.2768510389230319,
      "grad_norm": 2.7103466987609863,
      "learning_rate": 4.3078724026924206e-05,
      "loss": 0.0068,
      "step": 946
    },
    {
      "epoch": 0.2771436932982148,
      "grad_norm": 12.31233024597168,
      "learning_rate": 4.3071407667544634e-05,
      "loss": 0.0343,
      "step": 947
    },
    {
      "epoch": 0.2774363476733977,
      "grad_norm": 4.28306770324707,
      "learning_rate": 4.306409130816506e-05,
      "loss": 0.0128,
      "step": 948
    },
    {
      "epoch": 0.27772900204858064,
      "grad_norm": 0.038289640098810196,
      "learning_rate": 4.305677494878549e-05,
      "loss": 0.0008,
      "step": 949
    },
    {
      "epoch": 0.27802165642376353,
      "grad_norm": 0.031705744564533234,
      "learning_rate": 4.304945858940591e-05,
      "loss": 0.0006,
      "step": 950
    },
    {
      "epoch": 0.2783143107989464,
      "grad_norm": 0.027836626395583153,
      "learning_rate": 4.304214223002634e-05,
      "loss": 0.0006,
      "step": 951
    },
    {
      "epoch": 0.27860696517412936,
      "grad_norm": 0.013349486514925957,
      "learning_rate": 4.303482587064677e-05,
      "loss": 0.0003,
      "step": 952
    },
    {
      "epoch": 0.27889961954931225,
      "grad_norm": 0.010819119401276112,
      "learning_rate": 4.3027509511267195e-05,
      "loss": 0.0002,
      "step": 953
    },
    {
      "epoch": 0.2791922739244952,
      "grad_norm": 0.011443225666880608,
      "learning_rate": 4.302019315188762e-05,
      "loss": 0.0002,
      "step": 954
    },
    {
      "epoch": 0.2794849282996781,
      "grad_norm": 8.645218849182129,
      "learning_rate": 4.301287679250805e-05,
      "loss": 0.0937,
      "step": 955
    },
    {
      "epoch": 0.27977758267486097,
      "grad_norm": 0.011645263060927391,
      "learning_rate": 4.300556043312848e-05,
      "loss": 0.0002,
      "step": 956
    },
    {
      "epoch": 0.2800702370500439,
      "grad_norm": 0.012442460283637047,
      "learning_rate": 4.299824407374891e-05,
      "loss": 0.0002,
      "step": 957
    },
    {
      "epoch": 0.2803628914252268,
      "grad_norm": 24.188045501708984,
      "learning_rate": 4.2990927714369335e-05,
      "loss": 0.0763,
      "step": 958
    },
    {
      "epoch": 0.2806555458004097,
      "grad_norm": 0.016478197649121284,
      "learning_rate": 4.298361135498976e-05,
      "loss": 0.0003,
      "step": 959
    },
    {
      "epoch": 0.28094820017559263,
      "grad_norm": 0.04385501518845558,
      "learning_rate": 4.297629499561019e-05,
      "loss": 0.0005,
      "step": 960
    },
    {
      "epoch": 0.2812408545507755,
      "grad_norm": 0.009579029865562916,
      "learning_rate": 4.296897863623061e-05,
      "loss": 0.0002,
      "step": 961
    },
    {
      "epoch": 0.28153350892595846,
      "grad_norm": 0.009482062421739101,
      "learning_rate": 4.296166227685104e-05,
      "loss": 0.0002,
      "step": 962
    },
    {
      "epoch": 0.28182616330114135,
      "grad_norm": 0.018784578889608383,
      "learning_rate": 4.295434591747147e-05,
      "loss": 0.0003,
      "step": 963
    },
    {
      "epoch": 0.28211881767632424,
      "grad_norm": 0.013786012306809425,
      "learning_rate": 4.2947029558091896e-05,
      "loss": 0.0003,
      "step": 964
    },
    {
      "epoch": 0.2824114720515072,
      "grad_norm": 0.07157467305660248,
      "learning_rate": 4.2939713198712324e-05,
      "loss": 0.0011,
      "step": 965
    },
    {
      "epoch": 0.28270412642669007,
      "grad_norm": 0.007835990749299526,
      "learning_rate": 4.293239683933275e-05,
      "loss": 0.0002,
      "step": 966
    },
    {
      "epoch": 0.28299678080187296,
      "grad_norm": 0.01569865457713604,
      "learning_rate": 4.292508047995318e-05,
      "loss": 0.0003,
      "step": 967
    },
    {
      "epoch": 0.2832894351770559,
      "grad_norm": 0.013751539401710033,
      "learning_rate": 4.291776412057361e-05,
      "loss": 0.0003,
      "step": 968
    },
    {
      "epoch": 0.2835820895522388,
      "grad_norm": 0.01229290384799242,
      "learning_rate": 4.2910447761194036e-05,
      "loss": 0.0002,
      "step": 969
    },
    {
      "epoch": 0.28387474392742174,
      "grad_norm": 0.012345947325229645,
      "learning_rate": 4.2903131401814464e-05,
      "loss": 0.0003,
      "step": 970
    },
    {
      "epoch": 0.2841673983026046,
      "grad_norm": 0.010408922098577023,
      "learning_rate": 4.2895815042434885e-05,
      "loss": 0.0002,
      "step": 971
    },
    {
      "epoch": 0.2844600526777875,
      "grad_norm": 0.00517492787912488,
      "learning_rate": 4.288849868305531e-05,
      "loss": 0.0001,
      "step": 972
    },
    {
      "epoch": 0.28475270705297046,
      "grad_norm": 0.02397426962852478,
      "learning_rate": 4.288118232367574e-05,
      "loss": 0.0004,
      "step": 973
    },
    {
      "epoch": 0.28504536142815334,
      "grad_norm": 0.010111522860825062,
      "learning_rate": 4.287386596429617e-05,
      "loss": 0.0002,
      "step": 974
    },
    {
      "epoch": 0.2853380158033363,
      "grad_norm": 5.845407962799072,
      "learning_rate": 4.28665496049166e-05,
      "loss": 0.0151,
      "step": 975
    },
    {
      "epoch": 0.2856306701785192,
      "grad_norm": 0.1198161393404007,
      "learning_rate": 4.2859233245537025e-05,
      "loss": 0.0008,
      "step": 976
    },
    {
      "epoch": 0.28592332455370206,
      "grad_norm": 0.01586102321743965,
      "learning_rate": 4.285191688615745e-05,
      "loss": 0.0003,
      "step": 977
    },
    {
      "epoch": 0.286215978928885,
      "grad_norm": 6.835101127624512,
      "learning_rate": 4.284460052677788e-05,
      "loss": 0.1479,
      "step": 978
    },
    {
      "epoch": 0.2865086333040679,
      "grad_norm": 0.00831079576164484,
      "learning_rate": 4.283728416739831e-05,
      "loss": 0.0001,
      "step": 979
    },
    {
      "epoch": 0.2868012876792508,
      "grad_norm": 0.006643155124038458,
      "learning_rate": 4.2829967808018736e-05,
      "loss": 0.0001,
      "step": 980
    },
    {
      "epoch": 0.2870939420544337,
      "grad_norm": 0.003035582136362791,
      "learning_rate": 4.2822651448639164e-05,
      "loss": 0.0001,
      "step": 981
    },
    {
      "epoch": 0.2873865964296166,
      "grad_norm": 3.952209234237671,
      "learning_rate": 4.2815335089259586e-05,
      "loss": 0.0115,
      "step": 982
    },
    {
      "epoch": 0.28767925080479956,
      "grad_norm": 0.01735353283584118,
      "learning_rate": 4.2808018729880013e-05,
      "loss": 0.0002,
      "step": 983
    },
    {
      "epoch": 0.28797190517998245,
      "grad_norm": 0.004753570072352886,
      "learning_rate": 4.280070237050044e-05,
      "loss": 0.0001,
      "step": 984
    },
    {
      "epoch": 0.28826455955516533,
      "grad_norm": 0.003133102785795927,
      "learning_rate": 4.279338601112087e-05,
      "loss": 0.0001,
      "step": 985
    },
    {
      "epoch": 0.2885572139303483,
      "grad_norm": 0.0429200679063797,
      "learning_rate": 4.27860696517413e-05,
      "loss": 0.0003,
      "step": 986
    },
    {
      "epoch": 0.28884986830553117,
      "grad_norm": 0.006937104742974043,
      "learning_rate": 4.2778753292361725e-05,
      "loss": 0.0001,
      "step": 987
    },
    {
      "epoch": 0.28914252268071405,
      "grad_norm": 0.004405143670737743,
      "learning_rate": 4.277143693298215e-05,
      "loss": 0.0001,
      "step": 988
    },
    {
      "epoch": 0.289435177055897,
      "grad_norm": 0.04413849487900734,
      "learning_rate": 4.276412057360258e-05,
      "loss": 0.0002,
      "step": 989
    },
    {
      "epoch": 0.2897278314310799,
      "grad_norm": 0.0017203919123858213,
      "learning_rate": 4.275680421422301e-05,
      "loss": 0.0,
      "step": 990
    },
    {
      "epoch": 0.29002048580626283,
      "grad_norm": 0.005167375318706036,
      "learning_rate": 4.274948785484344e-05,
      "loss": 0.0001,
      "step": 991
    },
    {
      "epoch": 0.2903131401814457,
      "grad_norm": 0.005177373066544533,
      "learning_rate": 4.2742171495463865e-05,
      "loss": 0.0001,
      "step": 992
    },
    {
      "epoch": 0.2906057945566286,
      "grad_norm": 0.0017759123584255576,
      "learning_rate": 4.2734855136084286e-05,
      "loss": 0.0,
      "step": 993
    },
    {
      "epoch": 0.29089844893181155,
      "grad_norm": 3.8305413722991943,
      "learning_rate": 4.2727538776704714e-05,
      "loss": 0.16,
      "step": 994
    },
    {
      "epoch": 0.29119110330699444,
      "grad_norm": 0.002959401113912463,
      "learning_rate": 4.272022241732514e-05,
      "loss": 0.0001,
      "step": 995
    },
    {
      "epoch": 0.2914837576821773,
      "grad_norm": 7.244873046875,
      "learning_rate": 4.271290605794557e-05,
      "loss": 0.2784,
      "step": 996
    },
    {
      "epoch": 0.29177641205736027,
      "grad_norm": 0.01972855255007744,
      "learning_rate": 4.2705589698566e-05,
      "loss": 0.0003,
      "step": 997
    },
    {
      "epoch": 0.29206906643254316,
      "grad_norm": 0.026749806478619576,
      "learning_rate": 4.2698273339186426e-05,
      "loss": 0.0003,
      "step": 998
    },
    {
      "epoch": 0.2923617208077261,
      "grad_norm": 0.015836404636502266,
      "learning_rate": 4.2690956979806854e-05,
      "loss": 0.0003,
      "step": 999
    },
    {
      "epoch": 0.292654375182909,
      "grad_norm": 0.022398140281438828,
      "learning_rate": 4.268364062042728e-05,
      "loss": 0.0004,
      "step": 1000
    },
    {
      "epoch": 0.2929470295580919,
      "grad_norm": 0.10159856826066971,
      "learning_rate": 4.267632426104771e-05,
      "loss": 0.0015,
      "step": 1001
    },
    {
      "epoch": 0.2932396839332748,
      "grad_norm": 13.095772743225098,
      "learning_rate": 4.266900790166813e-05,
      "loss": 0.1974,
      "step": 1002
    },
    {
      "epoch": 0.2935323383084577,
      "grad_norm": 0.294485867023468,
      "learning_rate": 4.266169154228856e-05,
      "loss": 0.0034,
      "step": 1003
    },
    {
      "epoch": 0.2938249926836406,
      "grad_norm": 0.6832722425460815,
      "learning_rate": 4.265437518290899e-05,
      "loss": 0.0056,
      "step": 1004
    },
    {
      "epoch": 0.29411764705882354,
      "grad_norm": 1.7126188278198242,
      "learning_rate": 4.2647058823529415e-05,
      "loss": 0.0082,
      "step": 1005
    },
    {
      "epoch": 0.2944103014340064,
      "grad_norm": 0.010100563988089561,
      "learning_rate": 4.263974246414984e-05,
      "loss": 0.0002,
      "step": 1006
    },
    {
      "epoch": 0.29470295580918937,
      "grad_norm": 0.010684849694371223,
      "learning_rate": 4.263242610477027e-05,
      "loss": 0.0002,
      "step": 1007
    },
    {
      "epoch": 0.29499561018437226,
      "grad_norm": 0.06480780988931656,
      "learning_rate": 4.26251097453907e-05,
      "loss": 0.0006,
      "step": 1008
    },
    {
      "epoch": 0.29528826455955515,
      "grad_norm": 0.40292733907699585,
      "learning_rate": 4.2617793386011127e-05,
      "loss": 0.0014,
      "step": 1009
    },
    {
      "epoch": 0.2955809189347381,
      "grad_norm": 5.591978073120117,
      "learning_rate": 4.261047702663155e-05,
      "loss": 0.0735,
      "step": 1010
    },
    {
      "epoch": 0.295873573309921,
      "grad_norm": 0.004439515061676502,
      "learning_rate": 4.2603160667251976e-05,
      "loss": 0.0001,
      "step": 1011
    },
    {
      "epoch": 0.29616622768510387,
      "grad_norm": 0.2625431418418884,
      "learning_rate": 4.2595844307872404e-05,
      "loss": 0.0013,
      "step": 1012
    },
    {
      "epoch": 0.2964588820602868,
      "grad_norm": 0.8707738518714905,
      "learning_rate": 4.258852794849283e-05,
      "loss": 0.0023,
      "step": 1013
    },
    {
      "epoch": 0.2967515364354697,
      "grad_norm": 0.00825866125524044,
      "learning_rate": 4.258121158911326e-05,
      "loss": 0.0002,
      "step": 1014
    },
    {
      "epoch": 0.29704419081065264,
      "grad_norm": 0.011433348059654236,
      "learning_rate": 4.257389522973369e-05,
      "loss": 0.0002,
      "step": 1015
    },
    {
      "epoch": 0.29733684518583553,
      "grad_norm": 0.005431619007140398,
      "learning_rate": 4.2566578870354115e-05,
      "loss": 0.0001,
      "step": 1016
    },
    {
      "epoch": 0.2976294995610184,
      "grad_norm": 0.006892085541039705,
      "learning_rate": 4.2559262510974543e-05,
      "loss": 0.0001,
      "step": 1017
    },
    {
      "epoch": 0.29792215393620136,
      "grad_norm": 0.016305092722177505,
      "learning_rate": 4.255194615159497e-05,
      "loss": 0.0002,
      "step": 1018
    },
    {
      "epoch": 0.29821480831138425,
      "grad_norm": 0.015026643872261047,
      "learning_rate": 4.254462979221539e-05,
      "loss": 0.0002,
      "step": 1019
    },
    {
      "epoch": 0.29850746268656714,
      "grad_norm": 0.025266500189900398,
      "learning_rate": 4.253731343283582e-05,
      "loss": 0.0004,
      "step": 1020
    },
    {
      "epoch": 0.2988001170617501,
      "grad_norm": 0.014823542907834053,
      "learning_rate": 4.252999707345625e-05,
      "loss": 0.0003,
      "step": 1021
    },
    {
      "epoch": 0.29909277143693297,
      "grad_norm": 0.04029303789138794,
      "learning_rate": 4.2522680714076676e-05,
      "loss": 0.0004,
      "step": 1022
    },
    {
      "epoch": 0.2993854258121159,
      "grad_norm": 3.154346466064453,
      "learning_rate": 4.2515364354697104e-05,
      "loss": 0.1832,
      "step": 1023
    },
    {
      "epoch": 0.2996780801872988,
      "grad_norm": 0.040032967925071716,
      "learning_rate": 4.250804799531753e-05,
      "loss": 0.0006,
      "step": 1024
    },
    {
      "epoch": 0.2999707345624817,
      "grad_norm": 7.443057537078857,
      "learning_rate": 4.250073163593796e-05,
      "loss": 0.017,
      "step": 1025
    },
    {
      "epoch": 0.30026338893766463,
      "grad_norm": 0.3301943838596344,
      "learning_rate": 4.249341527655839e-05,
      "loss": 0.0031,
      "step": 1026
    },
    {
      "epoch": 0.3005560433128475,
      "grad_norm": 0.009938620030879974,
      "learning_rate": 4.248609891717881e-05,
      "loss": 0.0002,
      "step": 1027
    },
    {
      "epoch": 0.3008486976880304,
      "grad_norm": 0.0500522255897522,
      "learning_rate": 4.247878255779924e-05,
      "loss": 0.0007,
      "step": 1028
    },
    {
      "epoch": 0.30114135206321335,
      "grad_norm": 2.4152634143829346,
      "learning_rate": 4.2471466198419665e-05,
      "loss": 0.298,
      "step": 1029
    },
    {
      "epoch": 0.30143400643839624,
      "grad_norm": 1.904123067855835,
      "learning_rate": 4.246414983904009e-05,
      "loss": 0.0096,
      "step": 1030
    },
    {
      "epoch": 0.3017266608135792,
      "grad_norm": 0.4908471405506134,
      "learning_rate": 4.245683347966052e-05,
      "loss": 0.0031,
      "step": 1031
    },
    {
      "epoch": 0.30201931518876207,
      "grad_norm": 0.007599519100040197,
      "learning_rate": 4.244951712028095e-05,
      "loss": 0.0002,
      "step": 1032
    },
    {
      "epoch": 0.30231196956394496,
      "grad_norm": 8.015567779541016,
      "learning_rate": 4.244220076090138e-05,
      "loss": 0.0568,
      "step": 1033
    },
    {
      "epoch": 0.3026046239391279,
      "grad_norm": 0.13986808061599731,
      "learning_rate": 4.2434884401521805e-05,
      "loss": 0.0007,
      "step": 1034
    },
    {
      "epoch": 0.3028972783143108,
      "grad_norm": 0.00213990593329072,
      "learning_rate": 4.2427568042142226e-05,
      "loss": 0.0001,
      "step": 1035
    },
    {
      "epoch": 0.30318993268949374,
      "grad_norm": 0.9953876733779907,
      "learning_rate": 4.2420251682762654e-05,
      "loss": 0.0055,
      "step": 1036
    },
    {
      "epoch": 0.3034825870646766,
      "grad_norm": 0.012336323037743568,
      "learning_rate": 4.241293532338308e-05,
      "loss": 0.0002,
      "step": 1037
    },
    {
      "epoch": 0.3037752414398595,
      "grad_norm": 8.195555686950684,
      "learning_rate": 4.240561896400351e-05,
      "loss": 0.0533,
      "step": 1038
    },
    {
      "epoch": 0.30406789581504245,
      "grad_norm": 0.0049442751333117485,
      "learning_rate": 4.239830260462394e-05,
      "loss": 0.0001,
      "step": 1039
    },
    {
      "epoch": 0.30436055019022534,
      "grad_norm": 0.010625405237078667,
      "learning_rate": 4.2390986245244366e-05,
      "loss": 0.0003,
      "step": 1040
    },
    {
      "epoch": 0.30465320456540823,
      "grad_norm": 0.019566647708415985,
      "learning_rate": 4.2383669885864794e-05,
      "loss": 0.0003,
      "step": 1041
    },
    {
      "epoch": 0.3049458589405912,
      "grad_norm": 7.804142951965332,
      "learning_rate": 4.237635352648522e-05,
      "loss": 0.0354,
      "step": 1042
    },
    {
      "epoch": 0.30523851331577406,
      "grad_norm": 0.4666447639465332,
      "learning_rate": 4.236903716710565e-05,
      "loss": 0.0027,
      "step": 1043
    },
    {
      "epoch": 0.305531167690957,
      "grad_norm": 0.0783332884311676,
      "learning_rate": 4.236172080772608e-05,
      "loss": 0.0012,
      "step": 1044
    },
    {
      "epoch": 0.3058238220661399,
      "grad_norm": 9.876611709594727,
      "learning_rate": 4.2354404448346506e-05,
      "loss": 0.0625,
      "step": 1045
    },
    {
      "epoch": 0.3061164764413228,
      "grad_norm": 3.9588825702667236,
      "learning_rate": 4.234708808896693e-05,
      "loss": 0.0243,
      "step": 1046
    },
    {
      "epoch": 0.3064091308165057,
      "grad_norm": 0.024225052446126938,
      "learning_rate": 4.2339771729587355e-05,
      "loss": 0.0004,
      "step": 1047
    },
    {
      "epoch": 0.3067017851916886,
      "grad_norm": 0.15110638737678528,
      "learning_rate": 4.233245537020778e-05,
      "loss": 0.001,
      "step": 1048
    },
    {
      "epoch": 0.3069944395668715,
      "grad_norm": 0.0236615389585495,
      "learning_rate": 4.232513901082821e-05,
      "loss": 0.0004,
      "step": 1049
    },
    {
      "epoch": 0.30728709394205445,
      "grad_norm": 0.01637859269976616,
      "learning_rate": 4.231782265144864e-05,
      "loss": 0.0004,
      "step": 1050
    },
    {
      "epoch": 0.30757974831723733,
      "grad_norm": 0.015215440653264523,
      "learning_rate": 4.231050629206907e-05,
      "loss": 0.0003,
      "step": 1051
    },
    {
      "epoch": 0.3078724026924203,
      "grad_norm": 0.7368258833885193,
      "learning_rate": 4.2303189932689495e-05,
      "loss": 0.0038,
      "step": 1052
    },
    {
      "epoch": 0.30816505706760317,
      "grad_norm": 7.859990119934082,
      "learning_rate": 4.229587357330992e-05,
      "loss": 0.0521,
      "step": 1053
    },
    {
      "epoch": 0.30845771144278605,
      "grad_norm": 0.011727051809430122,
      "learning_rate": 4.228855721393035e-05,
      "loss": 0.0003,
      "step": 1054
    },
    {
      "epoch": 0.308750365817969,
      "grad_norm": 0.007139664608985186,
      "learning_rate": 4.228124085455078e-05,
      "loss": 0.0002,
      "step": 1055
    },
    {
      "epoch": 0.3090430201931519,
      "grad_norm": 1.1220675706863403,
      "learning_rate": 4.22739244951712e-05,
      "loss": 0.0031,
      "step": 1056
    },
    {
      "epoch": 0.3093356745683348,
      "grad_norm": 0.004913520999252796,
      "learning_rate": 4.226660813579163e-05,
      "loss": 0.0001,
      "step": 1057
    },
    {
      "epoch": 0.3096283289435177,
      "grad_norm": 0.047684524208307266,
      "learning_rate": 4.2259291776412056e-05,
      "loss": 0.0006,
      "step": 1058
    },
    {
      "epoch": 0.3099209833187006,
      "grad_norm": 0.07091566175222397,
      "learning_rate": 4.2251975417032483e-05,
      "loss": 0.0009,
      "step": 1059
    },
    {
      "epoch": 0.31021363769388355,
      "grad_norm": 5.7323899269104,
      "learning_rate": 4.224465905765291e-05,
      "loss": 0.014,
      "step": 1060
    },
    {
      "epoch": 0.31050629206906644,
      "grad_norm": 10.569522857666016,
      "learning_rate": 4.223734269827334e-05,
      "loss": 0.0419,
      "step": 1061
    },
    {
      "epoch": 0.3107989464442493,
      "grad_norm": 0.01675260253250599,
      "learning_rate": 4.223002633889377e-05,
      "loss": 0.0003,
      "step": 1062
    },
    {
      "epoch": 0.31109160081943227,
      "grad_norm": 0.010008047334849834,
      "learning_rate": 4.2222709979514195e-05,
      "loss": 0.0002,
      "step": 1063
    },
    {
      "epoch": 0.31138425519461516,
      "grad_norm": 0.27900147438049316,
      "learning_rate": 4.221539362013462e-05,
      "loss": 0.0026,
      "step": 1064
    },
    {
      "epoch": 0.31167690956979804,
      "grad_norm": 0.007643221411854029,
      "learning_rate": 4.220807726075505e-05,
      "loss": 0.0002,
      "step": 1065
    },
    {
      "epoch": 0.311969563944981,
      "grad_norm": 0.9489452242851257,
      "learning_rate": 4.220076090137548e-05,
      "loss": 0.0053,
      "step": 1066
    },
    {
      "epoch": 0.3122622183201639,
      "grad_norm": 4.815219402313232,
      "learning_rate": 4.21934445419959e-05,
      "loss": 0.1514,
      "step": 1067
    },
    {
      "epoch": 0.3125548726953468,
      "grad_norm": 0.020156513899564743,
      "learning_rate": 4.218612818261633e-05,
      "loss": 0.0003,
      "step": 1068
    },
    {
      "epoch": 0.3128475270705297,
      "grad_norm": 0.25733235478401184,
      "learning_rate": 4.2178811823236756e-05,
      "loss": 0.0007,
      "step": 1069
    },
    {
      "epoch": 0.3131401814457126,
      "grad_norm": 0.6491882801055908,
      "learning_rate": 4.2171495463857184e-05,
      "loss": 0.0016,
      "step": 1070
    },
    {
      "epoch": 0.31343283582089554,
      "grad_norm": 0.0020534771028906107,
      "learning_rate": 4.216417910447761e-05,
      "loss": 0.0001,
      "step": 1071
    },
    {
      "epoch": 0.3137254901960784,
      "grad_norm": 0.004543904215097427,
      "learning_rate": 4.215686274509804e-05,
      "loss": 0.0001,
      "step": 1072
    },
    {
      "epoch": 0.3140181445712613,
      "grad_norm": 0.006593961734324694,
      "learning_rate": 4.214954638571847e-05,
      "loss": 0.0002,
      "step": 1073
    },
    {
      "epoch": 0.31431079894644426,
      "grad_norm": 0.002780098468065262,
      "learning_rate": 4.2142230026338896e-05,
      "loss": 0.0001,
      "step": 1074
    },
    {
      "epoch": 0.31460345332162715,
      "grad_norm": 0.005017926450818777,
      "learning_rate": 4.2134913666959324e-05,
      "loss": 0.0001,
      "step": 1075
    },
    {
      "epoch": 0.3148961076968101,
      "grad_norm": 0.0033071120269596577,
      "learning_rate": 4.212759730757975e-05,
      "loss": 0.0001,
      "step": 1076
    },
    {
      "epoch": 0.315188762071993,
      "grad_norm": 0.007819382473826408,
      "learning_rate": 4.212028094820018e-05,
      "loss": 0.0001,
      "step": 1077
    },
    {
      "epoch": 0.31548141644717587,
      "grad_norm": 0.004965700674802065,
      "learning_rate": 4.21129645888206e-05,
      "loss": 0.0001,
      "step": 1078
    },
    {
      "epoch": 0.3157740708223588,
      "grad_norm": 0.07569081336259842,
      "learning_rate": 4.210564822944103e-05,
      "loss": 0.0005,
      "step": 1079
    },
    {
      "epoch": 0.3160667251975417,
      "grad_norm": 0.0030827720183879137,
      "learning_rate": 4.209833187006146e-05,
      "loss": 0.0001,
      "step": 1080
    },
    {
      "epoch": 0.3163593795727246,
      "grad_norm": 0.006564725656062365,
      "learning_rate": 4.2091015510681885e-05,
      "loss": 0.0002,
      "step": 1081
    },
    {
      "epoch": 0.31665203394790753,
      "grad_norm": 0.004230671562254429,
      "learning_rate": 4.208369915130231e-05,
      "loss": 0.0001,
      "step": 1082
    },
    {
      "epoch": 0.3169446883230904,
      "grad_norm": 0.12409224361181259,
      "learning_rate": 4.207638279192274e-05,
      "loss": 0.0007,
      "step": 1083
    },
    {
      "epoch": 0.31723734269827336,
      "grad_norm": 0.44127991795539856,
      "learning_rate": 4.206906643254317e-05,
      "loss": 0.0011,
      "step": 1084
    },
    {
      "epoch": 0.31752999707345625,
      "grad_norm": 0.0017722928896546364,
      "learning_rate": 4.20617500731636e-05,
      "loss": 0.0001,
      "step": 1085
    },
    {
      "epoch": 0.31782265144863914,
      "grad_norm": 0.0450950562953949,
      "learning_rate": 4.2054433713784025e-05,
      "loss": 0.0004,
      "step": 1086
    },
    {
      "epoch": 0.3181153058238221,
      "grad_norm": 0.00162849563639611,
      "learning_rate": 4.204711735440445e-05,
      "loss": 0.0,
      "step": 1087
    },
    {
      "epoch": 0.31840796019900497,
      "grad_norm": 0.003102143993601203,
      "learning_rate": 4.2039800995024874e-05,
      "loss": 0.0001,
      "step": 1088
    },
    {
      "epoch": 0.31870061457418786,
      "grad_norm": 11.183212280273438,
      "learning_rate": 4.20324846356453e-05,
      "loss": 0.0843,
      "step": 1089
    },
    {
      "epoch": 0.3189932689493708,
      "grad_norm": 6.125216960906982,
      "learning_rate": 4.202516827626573e-05,
      "loss": 0.2432,
      "step": 1090
    },
    {
      "epoch": 0.3192859233245537,
      "grad_norm": 15.360879898071289,
      "learning_rate": 4.201785191688616e-05,
      "loss": 0.0511,
      "step": 1091
    },
    {
      "epoch": 0.31957857769973663,
      "grad_norm": 0.09148026257753372,
      "learning_rate": 4.2010535557506585e-05,
      "loss": 0.0007,
      "step": 1092
    },
    {
      "epoch": 0.3198712320749195,
      "grad_norm": 0.059560466557741165,
      "learning_rate": 4.2003219198127013e-05,
      "loss": 0.0005,
      "step": 1093
    },
    {
      "epoch": 0.3201638864501024,
      "grad_norm": 0.15515701472759247,
      "learning_rate": 4.199590283874744e-05,
      "loss": 0.0008,
      "step": 1094
    },
    {
      "epoch": 0.32045654082528535,
      "grad_norm": 0.5345580577850342,
      "learning_rate": 4.198858647936787e-05,
      "loss": 0.0022,
      "step": 1095
    },
    {
      "epoch": 0.32074919520046824,
      "grad_norm": 9.372148513793945,
      "learning_rate": 4.19812701199883e-05,
      "loss": 0.1242,
      "step": 1096
    },
    {
      "epoch": 0.3210418495756512,
      "grad_norm": 0.04386669024825096,
      "learning_rate": 4.1973953760608725e-05,
      "loss": 0.0005,
      "step": 1097
    },
    {
      "epoch": 0.32133450395083407,
      "grad_norm": 0.02652975730597973,
      "learning_rate": 4.196663740122915e-05,
      "loss": 0.0002,
      "step": 1098
    },
    {
      "epoch": 0.32162715832601696,
      "grad_norm": 0.0688885748386383,
      "learning_rate": 4.1959321041849574e-05,
      "loss": 0.0004,
      "step": 1099
    },
    {
      "epoch": 0.3219198127011999,
      "grad_norm": 7.224321365356445,
      "learning_rate": 4.195200468247e-05,
      "loss": 0.0256,
      "step": 1100
    },
    {
      "epoch": 0.3222124670763828,
      "grad_norm": 0.005632548127323389,
      "learning_rate": 4.194468832309043e-05,
      "loss": 0.0001,
      "step": 1101
    },
    {
      "epoch": 0.3225051214515657,
      "grad_norm": 6.820813179016113,
      "learning_rate": 4.193737196371086e-05,
      "loss": 0.1765,
      "step": 1102
    },
    {
      "epoch": 0.3227977758267486,
      "grad_norm": 0.0025644320994615555,
      "learning_rate": 4.1930055604331286e-05,
      "loss": 0.0001,
      "step": 1103
    },
    {
      "epoch": 0.3230904302019315,
      "grad_norm": 0.015021145343780518,
      "learning_rate": 4.1922739244951714e-05,
      "loss": 0.0001,
      "step": 1104
    },
    {
      "epoch": 0.32338308457711445,
      "grad_norm": 4.52939510345459,
      "learning_rate": 4.191542288557214e-05,
      "loss": 0.0091,
      "step": 1105
    },
    {
      "epoch": 0.32367573895229734,
      "grad_norm": 4.507110118865967,
      "learning_rate": 4.190810652619257e-05,
      "loss": 0.0182,
      "step": 1106
    },
    {
      "epoch": 0.32396839332748023,
      "grad_norm": 0.005083255935460329,
      "learning_rate": 4.1900790166813e-05,
      "loss": 0.0001,
      "step": 1107
    },
    {
      "epoch": 0.3242610477026632,
      "grad_norm": 0.018605127930641174,
      "learning_rate": 4.1893473807433426e-05,
      "loss": 0.0003,
      "step": 1108
    },
    {
      "epoch": 0.32455370207784606,
      "grad_norm": 0.02068709395825863,
      "learning_rate": 4.188615744805385e-05,
      "loss": 0.0003,
      "step": 1109
    },
    {
      "epoch": 0.32484635645302895,
      "grad_norm": 0.020834336057305336,
      "learning_rate": 4.1878841088674275e-05,
      "loss": 0.0003,
      "step": 1110
    },
    {
      "epoch": 0.3251390108282119,
      "grad_norm": 0.25753891468048096,
      "learning_rate": 4.18715247292947e-05,
      "loss": 0.0014,
      "step": 1111
    },
    {
      "epoch": 0.3254316652033948,
      "grad_norm": 5.986315727233887,
      "learning_rate": 4.186420836991513e-05,
      "loss": 0.1455,
      "step": 1112
    },
    {
      "epoch": 0.3257243195785777,
      "grad_norm": 0.11490517854690552,
      "learning_rate": 4.185689201053556e-05,
      "loss": 0.001,
      "step": 1113
    },
    {
      "epoch": 0.3260169739537606,
      "grad_norm": 8.565180778503418,
      "learning_rate": 4.184957565115599e-05,
      "loss": 0.0544,
      "step": 1114
    },
    {
      "epoch": 0.3263096283289435,
      "grad_norm": 0.002345768269151449,
      "learning_rate": 4.1842259291776415e-05,
      "loss": 0.0,
      "step": 1115
    },
    {
      "epoch": 0.32660228270412645,
      "grad_norm": 8.543319702148438,
      "learning_rate": 4.183494293239684e-05,
      "loss": 0.0232,
      "step": 1116
    },
    {
      "epoch": 0.32689493707930933,
      "grad_norm": 4.645630359649658,
      "learning_rate": 4.182762657301727e-05,
      "loss": 0.0164,
      "step": 1117
    },
    {
      "epoch": 0.3271875914544922,
      "grad_norm": 0.0025222499389201403,
      "learning_rate": 4.18203102136377e-05,
      "loss": 0.0001,
      "step": 1118
    },
    {
      "epoch": 0.32748024582967517,
      "grad_norm": 0.007890268228948116,
      "learning_rate": 4.1812993854258127e-05,
      "loss": 0.0002,
      "step": 1119
    },
    {
      "epoch": 0.32777290020485805,
      "grad_norm": 0.03280539810657501,
      "learning_rate": 4.180567749487855e-05,
      "loss": 0.0005,
      "step": 1120
    },
    {
      "epoch": 0.328065554580041,
      "grad_norm": 1.9922877550125122,
      "learning_rate": 4.1798361135498976e-05,
      "loss": 0.0093,
      "step": 1121
    },
    {
      "epoch": 0.3283582089552239,
      "grad_norm": 5.538125514984131,
      "learning_rate": 4.1791044776119404e-05,
      "loss": 0.013,
      "step": 1122
    },
    {
      "epoch": 0.3286508633304068,
      "grad_norm": 3.149320602416992,
      "learning_rate": 4.178372841673983e-05,
      "loss": 0.0151,
      "step": 1123
    },
    {
      "epoch": 0.3289435177055897,
      "grad_norm": 0.19224414229393005,
      "learning_rate": 4.177641205736026e-05,
      "loss": 0.0013,
      "step": 1124
    },
    {
      "epoch": 0.3292361720807726,
      "grad_norm": 0.16442540287971497,
      "learning_rate": 4.176909569798069e-05,
      "loss": 0.0011,
      "step": 1125
    },
    {
      "epoch": 0.3295288264559555,
      "grad_norm": 0.014610587619245052,
      "learning_rate": 4.1761779338601115e-05,
      "loss": 0.0003,
      "step": 1126
    },
    {
      "epoch": 0.32982148083113844,
      "grad_norm": 5.707912445068359,
      "learning_rate": 4.1754462979221543e-05,
      "loss": 0.1486,
      "step": 1127
    },
    {
      "epoch": 0.3301141352063213,
      "grad_norm": 0.023597678169608116,
      "learning_rate": 4.174714661984197e-05,
      "loss": 0.0004,
      "step": 1128
    },
    {
      "epoch": 0.33040678958150427,
      "grad_norm": 0.05142216011881828,
      "learning_rate": 4.17398302604624e-05,
      "loss": 0.0008,
      "step": 1129
    },
    {
      "epoch": 0.33069944395668716,
      "grad_norm": 3.5134053230285645,
      "learning_rate": 4.173251390108283e-05,
      "loss": 0.0108,
      "step": 1130
    },
    {
      "epoch": 0.33099209833187004,
      "grad_norm": 0.02050555869936943,
      "learning_rate": 4.172519754170325e-05,
      "loss": 0.0003,
      "step": 1131
    },
    {
      "epoch": 0.331284752707053,
      "grad_norm": 0.07703938335180283,
      "learning_rate": 4.1717881182323676e-05,
      "loss": 0.0011,
      "step": 1132
    },
    {
      "epoch": 0.3315774070822359,
      "grad_norm": 2.8332552909851074,
      "learning_rate": 4.1710564822944104e-05,
      "loss": 0.006,
      "step": 1133
    },
    {
      "epoch": 0.33187006145741876,
      "grad_norm": 0.015761759132146835,
      "learning_rate": 4.170324846356453e-05,
      "loss": 0.0003,
      "step": 1134
    },
    {
      "epoch": 0.3321627158326017,
      "grad_norm": 3.626243829727173,
      "learning_rate": 4.169593210418496e-05,
      "loss": 0.018,
      "step": 1135
    },
    {
      "epoch": 0.3324553702077846,
      "grad_norm": 0.04060226306319237,
      "learning_rate": 4.168861574480539e-05,
      "loss": 0.0005,
      "step": 1136
    },
    {
      "epoch": 0.33274802458296754,
      "grad_norm": 0.09494542330503464,
      "learning_rate": 4.1681299385425816e-05,
      "loss": 0.0008,
      "step": 1137
    },
    {
      "epoch": 0.3330406789581504,
      "grad_norm": 0.5154232382774353,
      "learning_rate": 4.1673983026046244e-05,
      "loss": 0.0026,
      "step": 1138
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 0.011833082884550095,
      "learning_rate": 4.166666666666667e-05,
      "loss": 0.0002,
      "step": 1139
    },
    {
      "epoch": 0.33362598770851626,
      "grad_norm": 0.4022832214832306,
      "learning_rate": 4.16593503072871e-05,
      "loss": 0.0012,
      "step": 1140
    },
    {
      "epoch": 0.33391864208369915,
      "grad_norm": 2.553866386413574,
      "learning_rate": 4.165203394790752e-05,
      "loss": 0.2013,
      "step": 1141
    },
    {
      "epoch": 0.33421129645888203,
      "grad_norm": 0.004225094802677631,
      "learning_rate": 4.164471758852795e-05,
      "loss": 0.0001,
      "step": 1142
    },
    {
      "epoch": 0.334503950834065,
      "grad_norm": 0.03997182846069336,
      "learning_rate": 4.163740122914838e-05,
      "loss": 0.0004,
      "step": 1143
    },
    {
      "epoch": 0.33479660520924787,
      "grad_norm": 0.016258778050541878,
      "learning_rate": 4.1630084869768805e-05,
      "loss": 0.0002,
      "step": 1144
    },
    {
      "epoch": 0.3350892595844308,
      "grad_norm": 15.144865989685059,
      "learning_rate": 4.162276851038923e-05,
      "loss": 0.0768,
      "step": 1145
    },
    {
      "epoch": 0.3353819139596137,
      "grad_norm": 0.0572894886136055,
      "learning_rate": 4.161545215100966e-05,
      "loss": 0.0007,
      "step": 1146
    },
    {
      "epoch": 0.3356745683347966,
      "grad_norm": 0.36676284670829773,
      "learning_rate": 4.160813579163009e-05,
      "loss": 0.0037,
      "step": 1147
    },
    {
      "epoch": 0.33596722270997953,
      "grad_norm": 0.058127518743276596,
      "learning_rate": 4.160081943225052e-05,
      "loss": 0.0007,
      "step": 1148
    },
    {
      "epoch": 0.3362598770851624,
      "grad_norm": 5.779123783111572,
      "learning_rate": 4.1593503072870945e-05,
      "loss": 0.0924,
      "step": 1149
    },
    {
      "epoch": 0.3365525314603453,
      "grad_norm": 0.017173565924167633,
      "learning_rate": 4.158618671349137e-05,
      "loss": 0.0003,
      "step": 1150
    },
    {
      "epoch": 0.33684518583552825,
      "grad_norm": 0.0010117714991793036,
      "learning_rate": 4.15788703541118e-05,
      "loss": 0.0,
      "step": 1151
    },
    {
      "epoch": 0.33713784021071114,
      "grad_norm": 0.10073135793209076,
      "learning_rate": 4.157155399473222e-05,
      "loss": 0.0015,
      "step": 1152
    },
    {
      "epoch": 0.3374304945858941,
      "grad_norm": 0.706798791885376,
      "learning_rate": 4.156423763535265e-05,
      "loss": 0.0061,
      "step": 1153
    },
    {
      "epoch": 0.33772314896107697,
      "grad_norm": 7.5920257568359375,
      "learning_rate": 4.155692127597308e-05,
      "loss": 0.0663,
      "step": 1154
    },
    {
      "epoch": 0.33801580333625986,
      "grad_norm": 0.046216003596782684,
      "learning_rate": 4.1549604916593506e-05,
      "loss": 0.0004,
      "step": 1155
    },
    {
      "epoch": 0.3383084577114428,
      "grad_norm": 0.029077712446451187,
      "learning_rate": 4.1542288557213934e-05,
      "loss": 0.0006,
      "step": 1156
    },
    {
      "epoch": 0.3386011120866257,
      "grad_norm": 0.04728193208575249,
      "learning_rate": 4.153497219783436e-05,
      "loss": 0.0005,
      "step": 1157
    },
    {
      "epoch": 0.3388937664618086,
      "grad_norm": 0.018949152901768684,
      "learning_rate": 4.152765583845479e-05,
      "loss": 0.0003,
      "step": 1158
    },
    {
      "epoch": 0.3391864208369915,
      "grad_norm": 0.002709601540118456,
      "learning_rate": 4.152033947907522e-05,
      "loss": 0.0001,
      "step": 1159
    },
    {
      "epoch": 0.3394790752121744,
      "grad_norm": 0.409335196018219,
      "learning_rate": 4.1513023119695645e-05,
      "loss": 0.002,
      "step": 1160
    },
    {
      "epoch": 0.33977172958735735,
      "grad_norm": 0.00565820187330246,
      "learning_rate": 4.1505706760316073e-05,
      "loss": 0.0002,
      "step": 1161
    },
    {
      "epoch": 0.34006438396254024,
      "grad_norm": 0.003459826810285449,
      "learning_rate": 4.1498390400936495e-05,
      "loss": 0.0001,
      "step": 1162
    },
    {
      "epoch": 0.34035703833772313,
      "grad_norm": 0.011411946266889572,
      "learning_rate": 4.149107404155692e-05,
      "loss": 0.0002,
      "step": 1163
    },
    {
      "epoch": 0.34064969271290607,
      "grad_norm": 0.003876009490340948,
      "learning_rate": 4.148375768217735e-05,
      "loss": 0.0001,
      "step": 1164
    },
    {
      "epoch": 0.34094234708808896,
      "grad_norm": 1.1270101070404053,
      "learning_rate": 4.147644132279778e-05,
      "loss": 0.0037,
      "step": 1165
    },
    {
      "epoch": 0.3412350014632719,
      "grad_norm": 0.0018890424398705363,
      "learning_rate": 4.1469124963418206e-05,
      "loss": 0.0,
      "step": 1166
    },
    {
      "epoch": 0.3415276558384548,
      "grad_norm": 0.5412814617156982,
      "learning_rate": 4.1461808604038634e-05,
      "loss": 0.0013,
      "step": 1167
    },
    {
      "epoch": 0.3418203102136377,
      "grad_norm": 0.23688070476055145,
      "learning_rate": 4.145449224465906e-05,
      "loss": 0.001,
      "step": 1168
    },
    {
      "epoch": 0.3421129645888206,
      "grad_norm": 0.005186624825000763,
      "learning_rate": 4.144717588527949e-05,
      "loss": 0.0001,
      "step": 1169
    },
    {
      "epoch": 0.3424056189640035,
      "grad_norm": 0.0717591941356659,
      "learning_rate": 4.143985952589992e-05,
      "loss": 0.0003,
      "step": 1170
    },
    {
      "epoch": 0.3426982733391864,
      "grad_norm": 0.004381998907774687,
      "learning_rate": 4.1432543166520346e-05,
      "loss": 0.0001,
      "step": 1171
    },
    {
      "epoch": 0.34299092771436934,
      "grad_norm": 0.004648114088922739,
      "learning_rate": 4.1425226807140774e-05,
      "loss": 0.0001,
      "step": 1172
    },
    {
      "epoch": 0.34328358208955223,
      "grad_norm": 0.002697630086913705,
      "learning_rate": 4.1417910447761195e-05,
      "loss": 0.0001,
      "step": 1173
    },
    {
      "epoch": 0.3435762364647352,
      "grad_norm": 0.003834686242043972,
      "learning_rate": 4.141059408838162e-05,
      "loss": 0.0001,
      "step": 1174
    },
    {
      "epoch": 0.34386889083991806,
      "grad_norm": 0.18755541741847992,
      "learning_rate": 4.140327772900205e-05,
      "loss": 0.0007,
      "step": 1175
    },
    {
      "epoch": 0.34416154521510095,
      "grad_norm": 0.0005057503585703671,
      "learning_rate": 4.139596136962248e-05,
      "loss": 0.0,
      "step": 1176
    },
    {
      "epoch": 0.3444541995902839,
      "grad_norm": 0.003325084690004587,
      "learning_rate": 4.138864501024291e-05,
      "loss": 0.0001,
      "step": 1177
    },
    {
      "epoch": 0.3447468539654668,
      "grad_norm": 0.0005210234085097909,
      "learning_rate": 4.1381328650863335e-05,
      "loss": 0.0,
      "step": 1178
    },
    {
      "epoch": 0.34503950834064967,
      "grad_norm": 0.004745765123516321,
      "learning_rate": 4.137401229148376e-05,
      "loss": 0.0001,
      "step": 1179
    },
    {
      "epoch": 0.3453321627158326,
      "grad_norm": 12.660859107971191,
      "learning_rate": 4.136669593210419e-05,
      "loss": 0.0821,
      "step": 1180
    },
    {
      "epoch": 0.3456248170910155,
      "grad_norm": 0.024795109406113625,
      "learning_rate": 4.135937957272462e-05,
      "loss": 0.0003,
      "step": 1181
    },
    {
      "epoch": 0.34591747146619845,
      "grad_norm": 0.0026485335547477007,
      "learning_rate": 4.135206321334505e-05,
      "loss": 0.0001,
      "step": 1182
    },
    {
      "epoch": 0.34621012584138133,
      "grad_norm": 0.0020074001513421535,
      "learning_rate": 4.1344746853965475e-05,
      "loss": 0.0,
      "step": 1183
    },
    {
      "epoch": 0.3465027802165642,
      "grad_norm": 0.0006435702671296895,
      "learning_rate": 4.1337430494585896e-05,
      "loss": 0.0,
      "step": 1184
    },
    {
      "epoch": 0.34679543459174716,
      "grad_norm": 0.0022121912334114313,
      "learning_rate": 4.1330114135206324e-05,
      "loss": 0.0001,
      "step": 1185
    },
    {
      "epoch": 0.34708808896693005,
      "grad_norm": 0.0025052560959011316,
      "learning_rate": 4.132279777582675e-05,
      "loss": 0.0001,
      "step": 1186
    },
    {
      "epoch": 0.34738074334211294,
      "grad_norm": 1.232609748840332,
      "learning_rate": 4.131548141644718e-05,
      "loss": 0.0041,
      "step": 1187
    },
    {
      "epoch": 0.3476733977172959,
      "grad_norm": 0.7070603966712952,
      "learning_rate": 4.130816505706761e-05,
      "loss": 0.0011,
      "step": 1188
    },
    {
      "epoch": 0.3479660520924788,
      "grad_norm": 0.16078346967697144,
      "learning_rate": 4.1300848697688036e-05,
      "loss": 0.0007,
      "step": 1189
    },
    {
      "epoch": 0.3482587064676617,
      "grad_norm": 0.0014520692639052868,
      "learning_rate": 4.1293532338308464e-05,
      "loss": 0.0,
      "step": 1190
    },
    {
      "epoch": 0.3485513608428446,
      "grad_norm": 0.0011567205656319857,
      "learning_rate": 4.128621597892889e-05,
      "loss": 0.0,
      "step": 1191
    },
    {
      "epoch": 0.3488440152180275,
      "grad_norm": 0.0019392389804124832,
      "learning_rate": 4.127889961954931e-05,
      "loss": 0.0001,
      "step": 1192
    },
    {
      "epoch": 0.34913666959321044,
      "grad_norm": 0.0016712337965145707,
      "learning_rate": 4.127158326016974e-05,
      "loss": 0.0,
      "step": 1193
    },
    {
      "epoch": 0.3494293239683933,
      "grad_norm": 0.21385757625102997,
      "learning_rate": 4.126426690079017e-05,
      "loss": 0.0005,
      "step": 1194
    },
    {
      "epoch": 0.3497219783435762,
      "grad_norm": 0.0075780716724693775,
      "learning_rate": 4.1256950541410597e-05,
      "loss": 0.0001,
      "step": 1195
    },
    {
      "epoch": 0.35001463271875916,
      "grad_norm": 0.001782455830834806,
      "learning_rate": 4.1249634182031025e-05,
      "loss": 0.0,
      "step": 1196
    },
    {
      "epoch": 0.35030728709394204,
      "grad_norm": 0.016709130257368088,
      "learning_rate": 4.124231782265145e-05,
      "loss": 0.0001,
      "step": 1197
    },
    {
      "epoch": 0.350599941469125,
      "grad_norm": 0.39221397042274475,
      "learning_rate": 4.123500146327188e-05,
      "loss": 0.001,
      "step": 1198
    },
    {
      "epoch": 0.3508925958443079,
      "grad_norm": 0.001340279122814536,
      "learning_rate": 4.122768510389231e-05,
      "loss": 0.0,
      "step": 1199
    },
    {
      "epoch": 0.35118525021949076,
      "grad_norm": 0.01328748557716608,
      "learning_rate": 4.122036874451273e-05,
      "loss": 0.0001,
      "step": 1200
    },
    {
      "epoch": 0.3514779045946737,
      "grad_norm": 0.001047693658620119,
      "learning_rate": 4.121305238513316e-05,
      "loss": 0.0,
      "step": 1201
    },
    {
      "epoch": 0.3517705589698566,
      "grad_norm": 0.0032731464598327875,
      "learning_rate": 4.1205736025753585e-05,
      "loss": 0.0001,
      "step": 1202
    },
    {
      "epoch": 0.3520632133450395,
      "grad_norm": 0.8166394829750061,
      "learning_rate": 4.1198419666374013e-05,
      "loss": 0.0019,
      "step": 1203
    },
    {
      "epoch": 0.3523558677202224,
      "grad_norm": 0.014093336649239063,
      "learning_rate": 4.119110330699444e-05,
      "loss": 0.0001,
      "step": 1204
    },
    {
      "epoch": 0.3526485220954053,
      "grad_norm": 21.849721908569336,
      "learning_rate": 4.118378694761487e-05,
      "loss": 0.0372,
      "step": 1205
    },
    {
      "epoch": 0.35294117647058826,
      "grad_norm": 0.0012545472709462047,
      "learning_rate": 4.11764705882353e-05,
      "loss": 0.0,
      "step": 1206
    },
    {
      "epoch": 0.35323383084577115,
      "grad_norm": 0.0010050591081380844,
      "learning_rate": 4.1169154228855725e-05,
      "loss": 0.0,
      "step": 1207
    },
    {
      "epoch": 0.35352648522095403,
      "grad_norm": 0.0011364357778802514,
      "learning_rate": 4.1161837869476146e-05,
      "loss": 0.0,
      "step": 1208
    },
    {
      "epoch": 0.353819139596137,
      "grad_norm": 0.2661976218223572,
      "learning_rate": 4.1154521510096574e-05,
      "loss": 0.0005,
      "step": 1209
    },
    {
      "epoch": 0.35411179397131987,
      "grad_norm": 0.0009252326563000679,
      "learning_rate": 4.1147205150717e-05,
      "loss": 0.0,
      "step": 1210
    },
    {
      "epoch": 0.35440444834650275,
      "grad_norm": 0.0011284584179520607,
      "learning_rate": 4.113988879133743e-05,
      "loss": 0.0,
      "step": 1211
    },
    {
      "epoch": 0.3546971027216857,
      "grad_norm": 0.010301393456757069,
      "learning_rate": 4.113257243195786e-05,
      "loss": 0.0,
      "step": 1212
    },
    {
      "epoch": 0.3549897570968686,
      "grad_norm": 1.3835265636444092,
      "learning_rate": 4.1125256072578286e-05,
      "loss": 0.0043,
      "step": 1213
    },
    {
      "epoch": 0.35528241147205153,
      "grad_norm": 0.0026471379678696394,
      "learning_rate": 4.1117939713198714e-05,
      "loss": 0.0001,
      "step": 1214
    },
    {
      "epoch": 0.3555750658472344,
      "grad_norm": 0.0007697929395362735,
      "learning_rate": 4.111062335381914e-05,
      "loss": 0.0,
      "step": 1215
    },
    {
      "epoch": 0.3558677202224173,
      "grad_norm": 0.00047303663450293243,
      "learning_rate": 4.110330699443956e-05,
      "loss": 0.0,
      "step": 1216
    },
    {
      "epoch": 0.35616037459760025,
      "grad_norm": 0.0036289000418037176,
      "learning_rate": 4.109599063505999e-05,
      "loss": 0.0,
      "step": 1217
    },
    {
      "epoch": 0.35645302897278314,
      "grad_norm": 0.0009147240780293941,
      "learning_rate": 4.108867427568042e-05,
      "loss": 0.0,
      "step": 1218
    },
    {
      "epoch": 0.356745683347966,
      "grad_norm": 0.0012819399125874043,
      "learning_rate": 4.108135791630085e-05,
      "loss": 0.0,
      "step": 1219
    },
    {
      "epoch": 0.35703833772314897,
      "grad_norm": 0.0010691086063161492,
      "learning_rate": 4.1074041556921275e-05,
      "loss": 0.0,
      "step": 1220
    },
    {
      "epoch": 0.35733099209833186,
      "grad_norm": 0.0008675382705405354,
      "learning_rate": 4.10667251975417e-05,
      "loss": 0.0,
      "step": 1221
    },
    {
      "epoch": 0.3576236464735148,
      "grad_norm": 10.915205955505371,
      "learning_rate": 4.105940883816213e-05,
      "loss": 0.4804,
      "step": 1222
    },
    {
      "epoch": 0.3579163008486977,
      "grad_norm": 0.0017219093861058354,
      "learning_rate": 4.105209247878256e-05,
      "loss": 0.0,
      "step": 1223
    },
    {
      "epoch": 0.3582089552238806,
      "grad_norm": 0.0019681653939187527,
      "learning_rate": 4.104477611940299e-05,
      "loss": 0.0,
      "step": 1224
    },
    {
      "epoch": 0.3585016095990635,
      "grad_norm": 0.0012229308485984802,
      "learning_rate": 4.1037459760023415e-05,
      "loss": 0.0,
      "step": 1225
    },
    {
      "epoch": 0.3587942639742464,
      "grad_norm": 0.0015450023347511888,
      "learning_rate": 4.1030143400643836e-05,
      "loss": 0.0,
      "step": 1226
    },
    {
      "epoch": 0.35908691834942935,
      "grad_norm": 0.008542041294276714,
      "learning_rate": 4.1022827041264264e-05,
      "loss": 0.0001,
      "step": 1227
    },
    {
      "epoch": 0.35937957272461224,
      "grad_norm": 0.007675641216337681,
      "learning_rate": 4.101551068188469e-05,
      "loss": 0.0001,
      "step": 1228
    },
    {
      "epoch": 0.3596722270997951,
      "grad_norm": 4.740835189819336,
      "learning_rate": 4.100819432250512e-05,
      "loss": 0.0509,
      "step": 1229
    },
    {
      "epoch": 0.35996488147497807,
      "grad_norm": 12.755969047546387,
      "learning_rate": 4.100087796312555e-05,
      "loss": 0.1278,
      "step": 1230
    },
    {
      "epoch": 0.36025753585016096,
      "grad_norm": 0.02200162783265114,
      "learning_rate": 4.0993561603745976e-05,
      "loss": 0.0001,
      "step": 1231
    },
    {
      "epoch": 0.36055019022534385,
      "grad_norm": 0.008278402499854565,
      "learning_rate": 4.0986245244366404e-05,
      "loss": 0.0001,
      "step": 1232
    },
    {
      "epoch": 0.3608428446005268,
      "grad_norm": 0.07329938560724258,
      "learning_rate": 4.097892888498683e-05,
      "loss": 0.0005,
      "step": 1233
    },
    {
      "epoch": 0.3611354989757097,
      "grad_norm": 0.0015238827327266335,
      "learning_rate": 4.097161252560726e-05,
      "loss": 0.0,
      "step": 1234
    },
    {
      "epoch": 0.3614281533508926,
      "grad_norm": 7.457364559173584,
      "learning_rate": 4.096429616622769e-05,
      "loss": 0.0106,
      "step": 1235
    },
    {
      "epoch": 0.3617208077260755,
      "grad_norm": 0.0005767598049715161,
      "learning_rate": 4.0956979806848115e-05,
      "loss": 0.0,
      "step": 1236
    },
    {
      "epoch": 0.3620134621012584,
      "grad_norm": 0.004051645752042532,
      "learning_rate": 4.094966344746854e-05,
      "loss": 0.0001,
      "step": 1237
    },
    {
      "epoch": 0.36230611647644134,
      "grad_norm": 0.0036045070737600327,
      "learning_rate": 4.0942347088088965e-05,
      "loss": 0.0001,
      "step": 1238
    },
    {
      "epoch": 0.36259877085162423,
      "grad_norm": 0.05607904866337776,
      "learning_rate": 4.093503072870939e-05,
      "loss": 0.0004,
      "step": 1239
    },
    {
      "epoch": 0.3628914252268071,
      "grad_norm": 0.010820225812494755,
      "learning_rate": 4.092771436932982e-05,
      "loss": 0.0002,
      "step": 1240
    },
    {
      "epoch": 0.36318407960199006,
      "grad_norm": 0.0014110167976468801,
      "learning_rate": 4.092039800995025e-05,
      "loss": 0.0,
      "step": 1241
    },
    {
      "epoch": 0.36347673397717295,
      "grad_norm": 0.06000782176852226,
      "learning_rate": 4.0913081650570676e-05,
      "loss": 0.0005,
      "step": 1242
    },
    {
      "epoch": 0.3637693883523559,
      "grad_norm": 0.1307305246591568,
      "learning_rate": 4.0905765291191104e-05,
      "loss": 0.0007,
      "step": 1243
    },
    {
      "epoch": 0.3640620427275388,
      "grad_norm": 0.0020625912584364414,
      "learning_rate": 4.089844893181153e-05,
      "loss": 0.0,
      "step": 1244
    },
    {
      "epoch": 0.36435469710272167,
      "grad_norm": 0.002705375896766782,
      "learning_rate": 4.089113257243196e-05,
      "loss": 0.0001,
      "step": 1245
    },
    {
      "epoch": 0.3646473514779046,
      "grad_norm": 0.43489834666252136,
      "learning_rate": 4.088381621305239e-05,
      "loss": 0.0007,
      "step": 1246
    },
    {
      "epoch": 0.3649400058530875,
      "grad_norm": 0.0008786004618741572,
      "learning_rate": 4.087649985367281e-05,
      "loss": 0.0,
      "step": 1247
    },
    {
      "epoch": 0.3652326602282704,
      "grad_norm": 0.9479494690895081,
      "learning_rate": 4.086918349429324e-05,
      "loss": 0.0029,
      "step": 1248
    },
    {
      "epoch": 0.36552531460345333,
      "grad_norm": 0.08482043445110321,
      "learning_rate": 4.0861867134913665e-05,
      "loss": 0.0002,
      "step": 1249
    },
    {
      "epoch": 0.3658179689786362,
      "grad_norm": 0.33638685941696167,
      "learning_rate": 4.085455077553409e-05,
      "loss": 0.0013,
      "step": 1250
    },
    {
      "epoch": 0.36611062335381916,
      "grad_norm": 0.004500244278460741,
      "learning_rate": 4.084723441615452e-05,
      "loss": 0.0001,
      "step": 1251
    },
    {
      "epoch": 0.36640327772900205,
      "grad_norm": 0.03262517228722572,
      "learning_rate": 4.083991805677495e-05,
      "loss": 0.0002,
      "step": 1252
    },
    {
      "epoch": 0.36669593210418494,
      "grad_norm": 0.0035471178125590086,
      "learning_rate": 4.083260169739538e-05,
      "loss": 0.0001,
      "step": 1253
    },
    {
      "epoch": 0.3669885864793679,
      "grad_norm": 0.010737705044448376,
      "learning_rate": 4.0825285338015805e-05,
      "loss": 0.0001,
      "step": 1254
    },
    {
      "epoch": 0.3672812408545508,
      "grad_norm": 0.0018867601174861193,
      "learning_rate": 4.081796897863623e-05,
      "loss": 0.0,
      "step": 1255
    },
    {
      "epoch": 0.36757389522973366,
      "grad_norm": 0.016890184953808784,
      "learning_rate": 4.081065261925666e-05,
      "loss": 0.0001,
      "step": 1256
    },
    {
      "epoch": 0.3678665496049166,
      "grad_norm": 9.655845642089844,
      "learning_rate": 4.080333625987709e-05,
      "loss": 0.0169,
      "step": 1257
    },
    {
      "epoch": 0.3681592039800995,
      "grad_norm": 14.044231414794922,
      "learning_rate": 4.079601990049751e-05,
      "loss": 0.1856,
      "step": 1258
    },
    {
      "epoch": 0.36845185835528244,
      "grad_norm": 13.366278648376465,
      "learning_rate": 4.078870354111794e-05,
      "loss": 0.0625,
      "step": 1259
    },
    {
      "epoch": 0.3687445127304653,
      "grad_norm": 0.0026375851593911648,
      "learning_rate": 4.0781387181738366e-05,
      "loss": 0.0001,
      "step": 1260
    },
    {
      "epoch": 0.3690371671056482,
      "grad_norm": 0.0007985808770172298,
      "learning_rate": 4.0774070822358794e-05,
      "loss": 0.0,
      "step": 1261
    },
    {
      "epoch": 0.36932982148083116,
      "grad_norm": 0.0016761808656156063,
      "learning_rate": 4.076675446297922e-05,
      "loss": 0.0,
      "step": 1262
    },
    {
      "epoch": 0.36962247585601404,
      "grad_norm": 0.001301329699344933,
      "learning_rate": 4.075943810359965e-05,
      "loss": 0.0,
      "step": 1263
    },
    {
      "epoch": 0.36991513023119693,
      "grad_norm": 0.004801755305379629,
      "learning_rate": 4.075212174422008e-05,
      "loss": 0.0001,
      "step": 1264
    },
    {
      "epoch": 0.3702077846063799,
      "grad_norm": 0.0013198426458984613,
      "learning_rate": 4.0744805384840506e-05,
      "loss": 0.0,
      "step": 1265
    },
    {
      "epoch": 0.37050043898156276,
      "grad_norm": 6.025171756744385,
      "learning_rate": 4.0737489025460934e-05,
      "loss": 0.2881,
      "step": 1266
    },
    {
      "epoch": 0.3707930933567457,
      "grad_norm": 0.0029023438692092896,
      "learning_rate": 4.073017266608136e-05,
      "loss": 0.0,
      "step": 1267
    },
    {
      "epoch": 0.3710857477319286,
      "grad_norm": 0.004703729413449764,
      "learning_rate": 4.072285630670179e-05,
      "loss": 0.0001,
      "step": 1268
    },
    {
      "epoch": 0.3713784021071115,
      "grad_norm": 0.001049618935212493,
      "learning_rate": 4.071553994732221e-05,
      "loss": 0.0,
      "step": 1269
    },
    {
      "epoch": 0.3716710564822944,
      "grad_norm": 0.0010271676583215594,
      "learning_rate": 4.070822358794264e-05,
      "loss": 0.0,
      "step": 1270
    },
    {
      "epoch": 0.3719637108574773,
      "grad_norm": 0.08657903224229813,
      "learning_rate": 4.070090722856307e-05,
      "loss": 0.0003,
      "step": 1271
    },
    {
      "epoch": 0.3722563652326602,
      "grad_norm": 0.002311570104211569,
      "learning_rate": 4.0693590869183495e-05,
      "loss": 0.0001,
      "step": 1272
    },
    {
      "epoch": 0.37254901960784315,
      "grad_norm": 0.0010429377434775233,
      "learning_rate": 4.068627450980392e-05,
      "loss": 0.0,
      "step": 1273
    },
    {
      "epoch": 0.37284167398302603,
      "grad_norm": 0.0025792322121560574,
      "learning_rate": 4.067895815042435e-05,
      "loss": 0.0001,
      "step": 1274
    },
    {
      "epoch": 0.373134328358209,
      "grad_norm": 0.0026215538382530212,
      "learning_rate": 4.067164179104478e-05,
      "loss": 0.0001,
      "step": 1275
    },
    {
      "epoch": 0.37342698273339187,
      "grad_norm": 3.9294819831848145,
      "learning_rate": 4.0664325431665206e-05,
      "loss": 0.0067,
      "step": 1276
    },
    {
      "epoch": 0.37371963710857475,
      "grad_norm": 0.010046660900115967,
      "learning_rate": 4.0657009072285634e-05,
      "loss": 0.0002,
      "step": 1277
    },
    {
      "epoch": 0.3740122914837577,
      "grad_norm": 0.002739732852205634,
      "learning_rate": 4.064969271290606e-05,
      "loss": 0.0001,
      "step": 1278
    },
    {
      "epoch": 0.3743049458589406,
      "grad_norm": 0.0019405756611377,
      "learning_rate": 4.0642376353526483e-05,
      "loss": 0.0,
      "step": 1279
    },
    {
      "epoch": 0.3745976002341235,
      "grad_norm": 0.0013102261582389474,
      "learning_rate": 4.063505999414691e-05,
      "loss": 0.0,
      "step": 1280
    },
    {
      "epoch": 0.3748902546093064,
      "grad_norm": 0.0016861463664099574,
      "learning_rate": 4.062774363476734e-05,
      "loss": 0.0,
      "step": 1281
    },
    {
      "epoch": 0.3751829089844893,
      "grad_norm": 0.018297750502824783,
      "learning_rate": 4.062042727538777e-05,
      "loss": 0.0002,
      "step": 1282
    },
    {
      "epoch": 0.37547556335967225,
      "grad_norm": 0.007014001719653606,
      "learning_rate": 4.0613110916008195e-05,
      "loss": 0.0001,
      "step": 1283
    },
    {
      "epoch": 0.37576821773485514,
      "grad_norm": 0.005624879617244005,
      "learning_rate": 4.060579455662862e-05,
      "loss": 0.0001,
      "step": 1284
    },
    {
      "epoch": 0.376060872110038,
      "grad_norm": 0.003993480000644922,
      "learning_rate": 4.059847819724905e-05,
      "loss": 0.0001,
      "step": 1285
    },
    {
      "epoch": 0.37635352648522097,
      "grad_norm": 0.03830829635262489,
      "learning_rate": 4.059116183786948e-05,
      "loss": 0.0002,
      "step": 1286
    },
    {
      "epoch": 0.37664618086040386,
      "grad_norm": 0.008096047677099705,
      "learning_rate": 4.058384547848991e-05,
      "loss": 0.0001,
      "step": 1287
    },
    {
      "epoch": 0.37693883523558674,
      "grad_norm": 0.0047937557101249695,
      "learning_rate": 4.0576529119110335e-05,
      "loss": 0.0001,
      "step": 1288
    },
    {
      "epoch": 0.3772314896107697,
      "grad_norm": 0.0018073159735649824,
      "learning_rate": 4.056921275973076e-05,
      "loss": 0.0001,
      "step": 1289
    },
    {
      "epoch": 0.3775241439859526,
      "grad_norm": 0.004695659503340721,
      "learning_rate": 4.0561896400351184e-05,
      "loss": 0.0001,
      "step": 1290
    },
    {
      "epoch": 0.3778167983611355,
      "grad_norm": 0.002083779312670231,
      "learning_rate": 4.055458004097161e-05,
      "loss": 0.0,
      "step": 1291
    },
    {
      "epoch": 0.3781094527363184,
      "grad_norm": 0.011576264165341854,
      "learning_rate": 4.054726368159204e-05,
      "loss": 0.0001,
      "step": 1292
    },
    {
      "epoch": 0.3784021071115013,
      "grad_norm": 0.002925613196566701,
      "learning_rate": 4.053994732221247e-05,
      "loss": 0.0001,
      "step": 1293
    },
    {
      "epoch": 0.37869476148668424,
      "grad_norm": 0.0024837800301611423,
      "learning_rate": 4.0532630962832896e-05,
      "loss": 0.0001,
      "step": 1294
    },
    {
      "epoch": 0.3789874158618671,
      "grad_norm": 0.0065367016941308975,
      "learning_rate": 4.0525314603453324e-05,
      "loss": 0.0001,
      "step": 1295
    },
    {
      "epoch": 0.37928007023705007,
      "grad_norm": 0.000926087552215904,
      "learning_rate": 4.051799824407375e-05,
      "loss": 0.0,
      "step": 1296
    },
    {
      "epoch": 0.37957272461223296,
      "grad_norm": 0.0016152521129697561,
      "learning_rate": 4.051068188469418e-05,
      "loss": 0.0,
      "step": 1297
    },
    {
      "epoch": 0.37986537898741585,
      "grad_norm": 0.015473726205527782,
      "learning_rate": 4.050336552531461e-05,
      "loss": 0.0002,
      "step": 1298
    },
    {
      "epoch": 0.3801580333625988,
      "grad_norm": 0.022625859826803207,
      "learning_rate": 4.0496049165935036e-05,
      "loss": 0.0002,
      "step": 1299
    },
    {
      "epoch": 0.3804506877377817,
      "grad_norm": 0.0019020326435565948,
      "learning_rate": 4.048873280655546e-05,
      "loss": 0.0,
      "step": 1300
    },
    {
      "epoch": 0.38074334211296457,
      "grad_norm": 0.0017117613460868597,
      "learning_rate": 4.0481416447175885e-05,
      "loss": 0.0,
      "step": 1301
    },
    {
      "epoch": 0.3810359964881475,
      "grad_norm": 0.002542115282267332,
      "learning_rate": 4.047410008779631e-05,
      "loss": 0.0001,
      "step": 1302
    },
    {
      "epoch": 0.3813286508633304,
      "grad_norm": 19.405616760253906,
      "learning_rate": 4.046678372841674e-05,
      "loss": 0.0528,
      "step": 1303
    },
    {
      "epoch": 0.38162130523851334,
      "grad_norm": 0.4098086655139923,
      "learning_rate": 4.045946736903717e-05,
      "loss": 0.001,
      "step": 1304
    },
    {
      "epoch": 0.38191395961369623,
      "grad_norm": 0.009942581877112389,
      "learning_rate": 4.0452151009657597e-05,
      "loss": 0.0001,
      "step": 1305
    },
    {
      "epoch": 0.3822066139888791,
      "grad_norm": 3.136842727661133,
      "learning_rate": 4.0444834650278025e-05,
      "loss": 0.2425,
      "step": 1306
    },
    {
      "epoch": 0.38249926836406206,
      "grad_norm": 0.002530040917918086,
      "learning_rate": 4.043751829089845e-05,
      "loss": 0.0001,
      "step": 1307
    },
    {
      "epoch": 0.38279192273924495,
      "grad_norm": 1.0293583869934082,
      "learning_rate": 4.043020193151888e-05,
      "loss": 0.003,
      "step": 1308
    },
    {
      "epoch": 0.38308457711442784,
      "grad_norm": 0.005790709052234888,
      "learning_rate": 4.042288557213931e-05,
      "loss": 0.0001,
      "step": 1309
    },
    {
      "epoch": 0.3833772314896108,
      "grad_norm": 0.008284694515168667,
      "learning_rate": 4.0415569212759736e-05,
      "loss": 0.0001,
      "step": 1310
    },
    {
      "epoch": 0.38366988586479367,
      "grad_norm": 0.0034184777177870274,
      "learning_rate": 4.040825285338016e-05,
      "loss": 0.0001,
      "step": 1311
    },
    {
      "epoch": 0.3839625402399766,
      "grad_norm": 1.2440024614334106,
      "learning_rate": 4.0400936494000585e-05,
      "loss": 0.0069,
      "step": 1312
    },
    {
      "epoch": 0.3842551946151595,
      "grad_norm": 0.01935744099318981,
      "learning_rate": 4.0393620134621013e-05,
      "loss": 0.0003,
      "step": 1313
    },
    {
      "epoch": 0.3845478489903424,
      "grad_norm": 0.007463513873517513,
      "learning_rate": 4.038630377524144e-05,
      "loss": 0.0002,
      "step": 1314
    },
    {
      "epoch": 0.38484050336552533,
      "grad_norm": 0.05692553520202637,
      "learning_rate": 4.037898741586187e-05,
      "loss": 0.0008,
      "step": 1315
    },
    {
      "epoch": 0.3851331577407082,
      "grad_norm": 4.072761058807373,
      "learning_rate": 4.03716710564823e-05,
      "loss": 0.2337,
      "step": 1316
    },
    {
      "epoch": 0.3854258121158911,
      "grad_norm": 7.714447975158691,
      "learning_rate": 4.0364354697102725e-05,
      "loss": 0.1148,
      "step": 1317
    },
    {
      "epoch": 0.38571846649107405,
      "grad_norm": 0.3765358328819275,
      "learning_rate": 4.035703833772315e-05,
      "loss": 0.0031,
      "step": 1318
    },
    {
      "epoch": 0.38601112086625694,
      "grad_norm": 13.320783615112305,
      "learning_rate": 4.034972197834358e-05,
      "loss": 0.1663,
      "step": 1319
    },
    {
      "epoch": 0.3863037752414399,
      "grad_norm": 0.00583779439330101,
      "learning_rate": 4.034240561896401e-05,
      "loss": 0.0002,
      "step": 1320
    },
    {
      "epoch": 0.38659642961662277,
      "grad_norm": 0.005912384018301964,
      "learning_rate": 4.033508925958444e-05,
      "loss": 0.0002,
      "step": 1321
    },
    {
      "epoch": 0.38688908399180566,
      "grad_norm": 5.1954522132873535,
      "learning_rate": 4.032777290020486e-05,
      "loss": 0.1351,
      "step": 1322
    },
    {
      "epoch": 0.3871817383669886,
      "grad_norm": 10.917094230651855,
      "learning_rate": 4.0320456540825286e-05,
      "loss": 0.237,
      "step": 1323
    },
    {
      "epoch": 0.3874743927421715,
      "grad_norm": 0.014544911682605743,
      "learning_rate": 4.0313140181445714e-05,
      "loss": 0.0004,
      "step": 1324
    },
    {
      "epoch": 0.3877670471173544,
      "grad_norm": 3.4436397552490234,
      "learning_rate": 4.030582382206614e-05,
      "loss": 0.0618,
      "step": 1325
    },
    {
      "epoch": 0.3880597014925373,
      "grad_norm": 2.7318570613861084,
      "learning_rate": 4.029850746268657e-05,
      "loss": 0.1711,
      "step": 1326
    },
    {
      "epoch": 0.3883523558677202,
      "grad_norm": 1.5790855884552002,
      "learning_rate": 4.0291191103307e-05,
      "loss": 0.0179,
      "step": 1327
    },
    {
      "epoch": 0.38864501024290316,
      "grad_norm": 3.171313762664795,
      "learning_rate": 4.0283874743927426e-05,
      "loss": 0.1463,
      "step": 1328
    },
    {
      "epoch": 0.38893766461808604,
      "grad_norm": 0.06968532502651215,
      "learning_rate": 4.0276558384547854e-05,
      "loss": 0.0014,
      "step": 1329
    },
    {
      "epoch": 0.38923031899326893,
      "grad_norm": 5.104183197021484,
      "learning_rate": 4.026924202516828e-05,
      "loss": 0.0801,
      "step": 1330
    },
    {
      "epoch": 0.3895229733684519,
      "grad_norm": 1.3833603858947754,
      "learning_rate": 4.026192566578871e-05,
      "loss": 0.0159,
      "step": 1331
    },
    {
      "epoch": 0.38981562774363476,
      "grad_norm": 4.297267913818359,
      "learning_rate": 4.025460930640913e-05,
      "loss": 0.0527,
      "step": 1332
    },
    {
      "epoch": 0.39010828211881765,
      "grad_norm": 0.034918636083602905,
      "learning_rate": 4.024729294702956e-05,
      "loss": 0.0008,
      "step": 1333
    },
    {
      "epoch": 0.3904009364940006,
      "grad_norm": 0.22305458784103394,
      "learning_rate": 4.023997658764999e-05,
      "loss": 0.0036,
      "step": 1334
    },
    {
      "epoch": 0.3906935908691835,
      "grad_norm": 0.03878597170114517,
      "learning_rate": 4.0232660228270415e-05,
      "loss": 0.0008,
      "step": 1335
    },
    {
      "epoch": 0.3909862452443664,
      "grad_norm": 0.02626451477408409,
      "learning_rate": 4.022534386889084e-05,
      "loss": 0.0007,
      "step": 1336
    },
    {
      "epoch": 0.3912788996195493,
      "grad_norm": 0.02288234233856201,
      "learning_rate": 4.021802750951127e-05,
      "loss": 0.0004,
      "step": 1337
    },
    {
      "epoch": 0.3915715539947322,
      "grad_norm": 0.026334531605243683,
      "learning_rate": 4.02107111501317e-05,
      "loss": 0.0006,
      "step": 1338
    },
    {
      "epoch": 0.39186420836991515,
      "grad_norm": 0.027551332488656044,
      "learning_rate": 4.0203394790752127e-05,
      "loss": 0.0004,
      "step": 1339
    },
    {
      "epoch": 0.39215686274509803,
      "grad_norm": 2.991457939147949,
      "learning_rate": 4.0196078431372555e-05,
      "loss": 0.0121,
      "step": 1340
    },
    {
      "epoch": 0.3924495171202809,
      "grad_norm": 0.13049960136413574,
      "learning_rate": 4.018876207199298e-05,
      "loss": 0.0013,
      "step": 1341
    },
    {
      "epoch": 0.39274217149546387,
      "grad_norm": 0.016805050894618034,
      "learning_rate": 4.018144571261341e-05,
      "loss": 0.0004,
      "step": 1342
    },
    {
      "epoch": 0.39303482587064675,
      "grad_norm": 0.01396495383232832,
      "learning_rate": 4.017412935323383e-05,
      "loss": 0.0003,
      "step": 1343
    },
    {
      "epoch": 0.3933274802458297,
      "grad_norm": 0.23971877992153168,
      "learning_rate": 4.016681299385426e-05,
      "loss": 0.001,
      "step": 1344
    },
    {
      "epoch": 0.3936201346210126,
      "grad_norm": 0.04552144929766655,
      "learning_rate": 4.015949663447469e-05,
      "loss": 0.0005,
      "step": 1345
    },
    {
      "epoch": 0.3939127889961955,
      "grad_norm": 0.006005613133311272,
      "learning_rate": 4.0152180275095115e-05,
      "loss": 0.0001,
      "step": 1346
    },
    {
      "epoch": 0.3942054433713784,
      "grad_norm": 0.03240946680307388,
      "learning_rate": 4.0144863915715543e-05,
      "loss": 0.0003,
      "step": 1347
    },
    {
      "epoch": 0.3944980977465613,
      "grad_norm": 0.02361867018043995,
      "learning_rate": 4.013754755633597e-05,
      "loss": 0.0003,
      "step": 1348
    },
    {
      "epoch": 0.3947907521217442,
      "grad_norm": 1.2638202905654907,
      "learning_rate": 4.01302311969564e-05,
      "loss": 0.0057,
      "step": 1349
    },
    {
      "epoch": 0.39508340649692714,
      "grad_norm": 0.002966237021610141,
      "learning_rate": 4.012291483757683e-05,
      "loss": 0.0001,
      "step": 1350
    },
    {
      "epoch": 0.39537606087211,
      "grad_norm": 3.348245143890381,
      "learning_rate": 4.0115598478197255e-05,
      "loss": 0.2752,
      "step": 1351
    },
    {
      "epoch": 0.39566871524729297,
      "grad_norm": 0.0027857578825205564,
      "learning_rate": 4.010828211881768e-05,
      "loss": 0.0001,
      "step": 1352
    },
    {
      "epoch": 0.39596136962247586,
      "grad_norm": 0.013372791931033134,
      "learning_rate": 4.010096575943811e-05,
      "loss": 0.0002,
      "step": 1353
    },
    {
      "epoch": 0.39625402399765874,
      "grad_norm": 0.07573962211608887,
      "learning_rate": 4.009364940005853e-05,
      "loss": 0.0007,
      "step": 1354
    },
    {
      "epoch": 0.3965466783728417,
      "grad_norm": 0.09464661031961441,
      "learning_rate": 4.008633304067896e-05,
      "loss": 0.0011,
      "step": 1355
    },
    {
      "epoch": 0.3968393327480246,
      "grad_norm": 9.230062484741211,
      "learning_rate": 4.007901668129939e-05,
      "loss": 0.1205,
      "step": 1356
    },
    {
      "epoch": 0.3971319871232075,
      "grad_norm": 6.861654281616211,
      "learning_rate": 4.0071700321919816e-05,
      "loss": 0.0189,
      "step": 1357
    },
    {
      "epoch": 0.3974246414983904,
      "grad_norm": 2.2964892387390137,
      "learning_rate": 4.0064383962540244e-05,
      "loss": 0.0201,
      "step": 1358
    },
    {
      "epoch": 0.3977172958735733,
      "grad_norm": 0.1187041699886322,
      "learning_rate": 4.005706760316067e-05,
      "loss": 0.0009,
      "step": 1359
    },
    {
      "epoch": 0.39800995024875624,
      "grad_norm": 0.45186883211135864,
      "learning_rate": 4.00497512437811e-05,
      "loss": 0.0022,
      "step": 1360
    },
    {
      "epoch": 0.3983026046239391,
      "grad_norm": 0.014179177582263947,
      "learning_rate": 4.004243488440153e-05,
      "loss": 0.0004,
      "step": 1361
    },
    {
      "epoch": 0.398595258999122,
      "grad_norm": 0.0133640356361866,
      "learning_rate": 4.0035118525021956e-05,
      "loss": 0.0003,
      "step": 1362
    },
    {
      "epoch": 0.39888791337430496,
      "grad_norm": 0.19518370926380157,
      "learning_rate": 4.0027802165642384e-05,
      "loss": 0.001,
      "step": 1363
    },
    {
      "epoch": 0.39918056774948785,
      "grad_norm": 5.994090557098389,
      "learning_rate": 4.0020485806262805e-05,
      "loss": 0.0362,
      "step": 1364
    },
    {
      "epoch": 0.3994732221246708,
      "grad_norm": 12.498130798339844,
      "learning_rate": 4.001316944688323e-05,
      "loss": 0.096,
      "step": 1365
    },
    {
      "epoch": 0.3997658764998537,
      "grad_norm": 0.07240074127912521,
      "learning_rate": 4.000585308750366e-05,
      "loss": 0.0008,
      "step": 1366
    },
    {
      "epoch": 0.40005853087503657,
      "grad_norm": 0.06022394821047783,
      "learning_rate": 3.999853672812409e-05,
      "loss": 0.001,
      "step": 1367
    },
    {
      "epoch": 0.4003511852502195,
      "grad_norm": 0.023105381056666374,
      "learning_rate": 3.999122036874452e-05,
      "loss": 0.0005,
      "step": 1368
    },
    {
      "epoch": 0.4006438396254024,
      "grad_norm": 0.024272827431559563,
      "learning_rate": 3.9983904009364945e-05,
      "loss": 0.0005,
      "step": 1369
    },
    {
      "epoch": 0.4009364940005853,
      "grad_norm": 0.013460013084113598,
      "learning_rate": 3.997658764998537e-05,
      "loss": 0.0003,
      "step": 1370
    },
    {
      "epoch": 0.40122914837576823,
      "grad_norm": 0.6552921533584595,
      "learning_rate": 3.99692712906058e-05,
      "loss": 0.0035,
      "step": 1371
    },
    {
      "epoch": 0.4015218027509511,
      "grad_norm": 0.03980037197470665,
      "learning_rate": 3.996195493122623e-05,
      "loss": 0.0006,
      "step": 1372
    },
    {
      "epoch": 0.40181445712613406,
      "grad_norm": 0.04527165740728378,
      "learning_rate": 3.995463857184665e-05,
      "loss": 0.0009,
      "step": 1373
    },
    {
      "epoch": 0.40210711150131695,
      "grad_norm": 0.021924695000052452,
      "learning_rate": 3.994732221246708e-05,
      "loss": 0.0005,
      "step": 1374
    },
    {
      "epoch": 0.40239976587649984,
      "grad_norm": 0.02878180518746376,
      "learning_rate": 3.9940005853087506e-05,
      "loss": 0.0006,
      "step": 1375
    },
    {
      "epoch": 0.4026924202516828,
      "grad_norm": 0.04821598529815674,
      "learning_rate": 3.9932689493707934e-05,
      "loss": 0.0008,
      "step": 1376
    },
    {
      "epoch": 0.40298507462686567,
      "grad_norm": 1.2048909664154053,
      "learning_rate": 3.992537313432836e-05,
      "loss": 0.0047,
      "step": 1377
    },
    {
      "epoch": 0.40327772900204856,
      "grad_norm": 0.01729060895740986,
      "learning_rate": 3.991805677494879e-05,
      "loss": 0.0004,
      "step": 1378
    },
    {
      "epoch": 0.4035703833772315,
      "grad_norm": 3.4174089431762695,
      "learning_rate": 3.991074041556922e-05,
      "loss": 0.1988,
      "step": 1379
    },
    {
      "epoch": 0.4038630377524144,
      "grad_norm": 0.06411304324865341,
      "learning_rate": 3.9903424056189645e-05,
      "loss": 0.0007,
      "step": 1380
    },
    {
      "epoch": 0.40415569212759733,
      "grad_norm": 0.008149644359946251,
      "learning_rate": 3.989610769681007e-05,
      "loss": 0.0002,
      "step": 1381
    },
    {
      "epoch": 0.4044483465027802,
      "grad_norm": 0.07035718113183975,
      "learning_rate": 3.9888791337430495e-05,
      "loss": 0.0007,
      "step": 1382
    },
    {
      "epoch": 0.4047410008779631,
      "grad_norm": 0.004200743976980448,
      "learning_rate": 3.988147497805092e-05,
      "loss": 0.0001,
      "step": 1383
    },
    {
      "epoch": 0.40503365525314605,
      "grad_norm": 0.01093363855034113,
      "learning_rate": 3.987415861867135e-05,
      "loss": 0.0002,
      "step": 1384
    },
    {
      "epoch": 0.40532630962832894,
      "grad_norm": 0.005739153828471899,
      "learning_rate": 3.986684225929178e-05,
      "loss": 0.0001,
      "step": 1385
    },
    {
      "epoch": 0.40561896400351183,
      "grad_norm": 0.0033381276298314333,
      "learning_rate": 3.9859525899912206e-05,
      "loss": 0.0001,
      "step": 1386
    },
    {
      "epoch": 0.40591161837869477,
      "grad_norm": 0.06442870944738388,
      "learning_rate": 3.9852209540532634e-05,
      "loss": 0.0008,
      "step": 1387
    },
    {
      "epoch": 0.40620427275387766,
      "grad_norm": 0.014269592240452766,
      "learning_rate": 3.984489318115306e-05,
      "loss": 0.0002,
      "step": 1388
    },
    {
      "epoch": 0.4064969271290606,
      "grad_norm": 12.036171913146973,
      "learning_rate": 3.9837576821773483e-05,
      "loss": 0.1214,
      "step": 1389
    },
    {
      "epoch": 0.4067895815042435,
      "grad_norm": 9.762030601501465,
      "learning_rate": 3.983026046239391e-05,
      "loss": 0.132,
      "step": 1390
    },
    {
      "epoch": 0.4070822358794264,
      "grad_norm": 0.011800894513726234,
      "learning_rate": 3.982294410301434e-05,
      "loss": 0.0003,
      "step": 1391
    },
    {
      "epoch": 0.4073748902546093,
      "grad_norm": 0.014297746121883392,
      "learning_rate": 3.981562774363477e-05,
      "loss": 0.0002,
      "step": 1392
    },
    {
      "epoch": 0.4076675446297922,
      "grad_norm": 0.01088868360966444,
      "learning_rate": 3.9808311384255195e-05,
      "loss": 0.0002,
      "step": 1393
    },
    {
      "epoch": 0.4079601990049751,
      "grad_norm": 0.012100731022655964,
      "learning_rate": 3.980099502487562e-05,
      "loss": 0.0003,
      "step": 1394
    },
    {
      "epoch": 0.40825285338015804,
      "grad_norm": 0.0045606642961502075,
      "learning_rate": 3.979367866549605e-05,
      "loss": 0.0001,
      "step": 1395
    },
    {
      "epoch": 0.40854550775534093,
      "grad_norm": 0.007512548007071018,
      "learning_rate": 3.978636230611648e-05,
      "loss": 0.0002,
      "step": 1396
    },
    {
      "epoch": 0.4088381621305239,
      "grad_norm": 8.761828422546387,
      "learning_rate": 3.977904594673691e-05,
      "loss": 0.1879,
      "step": 1397
    },
    {
      "epoch": 0.40913081650570676,
      "grad_norm": 0.007708055432885885,
      "learning_rate": 3.977172958735733e-05,
      "loss": 0.0002,
      "step": 1398
    },
    {
      "epoch": 0.40942347088088965,
      "grad_norm": 0.022862451151013374,
      "learning_rate": 3.9764413227977756e-05,
      "loss": 0.0004,
      "step": 1399
    },
    {
      "epoch": 0.4097161252560726,
      "grad_norm": 0.01622886210680008,
      "learning_rate": 3.9757096868598184e-05,
      "loss": 0.0002,
      "step": 1400
    },
    {
      "epoch": 0.4100087796312555,
      "grad_norm": 0.004277550149708986,
      "learning_rate": 3.974978050921861e-05,
      "loss": 0.0001,
      "step": 1401
    },
    {
      "epoch": 0.41030143400643837,
      "grad_norm": 0.6454548239707947,
      "learning_rate": 3.974246414983904e-05,
      "loss": 0.0028,
      "step": 1402
    },
    {
      "epoch": 0.4105940883816213,
      "grad_norm": 0.007324654143303633,
      "learning_rate": 3.973514779045947e-05,
      "loss": 0.0002,
      "step": 1403
    },
    {
      "epoch": 0.4108867427568042,
      "grad_norm": 0.06323648244142532,
      "learning_rate": 3.9727831431079896e-05,
      "loss": 0.0005,
      "step": 1404
    },
    {
      "epoch": 0.41117939713198715,
      "grad_norm": 0.005309247877448797,
      "learning_rate": 3.9720515071700324e-05,
      "loss": 0.0001,
      "step": 1405
    },
    {
      "epoch": 0.41147205150717003,
      "grad_norm": 0.08161300420761108,
      "learning_rate": 3.971319871232075e-05,
      "loss": 0.0007,
      "step": 1406
    },
    {
      "epoch": 0.4117647058823529,
      "grad_norm": 0.009725712239742279,
      "learning_rate": 3.970588235294117e-05,
      "loss": 0.0002,
      "step": 1407
    },
    {
      "epoch": 0.41205736025753587,
      "grad_norm": 0.0238348376005888,
      "learning_rate": 3.96985659935616e-05,
      "loss": 0.0003,
      "step": 1408
    },
    {
      "epoch": 0.41235001463271875,
      "grad_norm": 0.014538202434778214,
      "learning_rate": 3.969124963418203e-05,
      "loss": 0.0004,
      "step": 1409
    },
    {
      "epoch": 0.41264266900790164,
      "grad_norm": 0.01194294448941946,
      "learning_rate": 3.968393327480246e-05,
      "loss": 0.0003,
      "step": 1410
    },
    {
      "epoch": 0.4129353233830846,
      "grad_norm": 10.102177619934082,
      "learning_rate": 3.9676616915422885e-05,
      "loss": 0.0828,
      "step": 1411
    },
    {
      "epoch": 0.4132279777582675,
      "grad_norm": 0.0121079720556736,
      "learning_rate": 3.966930055604331e-05,
      "loss": 0.0002,
      "step": 1412
    },
    {
      "epoch": 0.4135206321334504,
      "grad_norm": 0.01376876700669527,
      "learning_rate": 3.966198419666374e-05,
      "loss": 0.0003,
      "step": 1413
    },
    {
      "epoch": 0.4138132865086333,
      "grad_norm": 0.013423971831798553,
      "learning_rate": 3.965466783728417e-05,
      "loss": 0.0003,
      "step": 1414
    },
    {
      "epoch": 0.4141059408838162,
      "grad_norm": 0.018480490893125534,
      "learning_rate": 3.9647351477904597e-05,
      "loss": 0.0002,
      "step": 1415
    },
    {
      "epoch": 0.41439859525899914,
      "grad_norm": 0.010010740719735622,
      "learning_rate": 3.9640035118525025e-05,
      "loss": 0.0002,
      "step": 1416
    },
    {
      "epoch": 0.414691249634182,
      "grad_norm": 0.04045334458351135,
      "learning_rate": 3.9632718759145446e-05,
      "loss": 0.0006,
      "step": 1417
    },
    {
      "epoch": 0.4149839040093649,
      "grad_norm": 0.015969226136803627,
      "learning_rate": 3.9625402399765874e-05,
      "loss": 0.0004,
      "step": 1418
    },
    {
      "epoch": 0.41527655838454786,
      "grad_norm": 0.012548488564789295,
      "learning_rate": 3.96180860403863e-05,
      "loss": 0.0003,
      "step": 1419
    },
    {
      "epoch": 0.41556921275973074,
      "grad_norm": 0.002381518017500639,
      "learning_rate": 3.961076968100673e-05,
      "loss": 0.0001,
      "step": 1420
    },
    {
      "epoch": 0.4158618671349137,
      "grad_norm": 4.0952887535095215,
      "learning_rate": 3.960345332162716e-05,
      "loss": 0.0229,
      "step": 1421
    },
    {
      "epoch": 0.4161545215100966,
      "grad_norm": 0.039171867072582245,
      "learning_rate": 3.9596136962247585e-05,
      "loss": 0.0006,
      "step": 1422
    },
    {
      "epoch": 0.41644717588527946,
      "grad_norm": 0.05395697057247162,
      "learning_rate": 3.9588820602868013e-05,
      "loss": 0.0005,
      "step": 1423
    },
    {
      "epoch": 0.4167398302604624,
      "grad_norm": 2.978508949279785,
      "learning_rate": 3.958150424348844e-05,
      "loss": 0.216,
      "step": 1424
    },
    {
      "epoch": 0.4170324846356453,
      "grad_norm": 0.11305846273899078,
      "learning_rate": 3.957418788410887e-05,
      "loss": 0.0012,
      "step": 1425
    },
    {
      "epoch": 0.41732513901082824,
      "grad_norm": 1.4474173784255981,
      "learning_rate": 3.95668715247293e-05,
      "loss": 0.0048,
      "step": 1426
    },
    {
      "epoch": 0.4176177933860111,
      "grad_norm": 0.029976138845086098,
      "learning_rate": 3.9559555165349725e-05,
      "loss": 0.0006,
      "step": 1427
    },
    {
      "epoch": 0.417910447761194,
      "grad_norm": 0.0030669139232486486,
      "learning_rate": 3.9552238805970146e-05,
      "loss": 0.0001,
      "step": 1428
    },
    {
      "epoch": 0.41820310213637696,
      "grad_norm": 0.02776286192238331,
      "learning_rate": 3.9544922446590574e-05,
      "loss": 0.0005,
      "step": 1429
    },
    {
      "epoch": 0.41849575651155985,
      "grad_norm": 0.009369317442178726,
      "learning_rate": 3.9537606087211e-05,
      "loss": 0.0003,
      "step": 1430
    },
    {
      "epoch": 0.41878841088674273,
      "grad_norm": 0.05225818604230881,
      "learning_rate": 3.953028972783143e-05,
      "loss": 0.0007,
      "step": 1431
    },
    {
      "epoch": 0.4190810652619257,
      "grad_norm": 0.013598539866507053,
      "learning_rate": 3.952297336845186e-05,
      "loss": 0.0004,
      "step": 1432
    },
    {
      "epoch": 0.41937371963710857,
      "grad_norm": 0.018940743058919907,
      "learning_rate": 3.9515657009072286e-05,
      "loss": 0.0003,
      "step": 1433
    },
    {
      "epoch": 0.4196663740122915,
      "grad_norm": 0.014823894016444683,
      "learning_rate": 3.9508340649692714e-05,
      "loss": 0.0004,
      "step": 1434
    },
    {
      "epoch": 0.4199590283874744,
      "grad_norm": 0.016674788668751717,
      "learning_rate": 3.950102429031314e-05,
      "loss": 0.0004,
      "step": 1435
    },
    {
      "epoch": 0.4202516827626573,
      "grad_norm": 0.02102663926780224,
      "learning_rate": 3.949370793093357e-05,
      "loss": 0.0005,
      "step": 1436
    },
    {
      "epoch": 0.42054433713784023,
      "grad_norm": 0.01617257483303547,
      "learning_rate": 3.9486391571554e-05,
      "loss": 0.0003,
      "step": 1437
    },
    {
      "epoch": 0.4208369915130231,
      "grad_norm": 18.397354125976562,
      "learning_rate": 3.947907521217442e-05,
      "loss": 0.0812,
      "step": 1438
    },
    {
      "epoch": 0.421129645888206,
      "grad_norm": 4.920436382293701,
      "learning_rate": 3.947175885279485e-05,
      "loss": 0.051,
      "step": 1439
    },
    {
      "epoch": 0.42142230026338895,
      "grad_norm": 0.019231706857681274,
      "learning_rate": 3.9464442493415275e-05,
      "loss": 0.0004,
      "step": 1440
    },
    {
      "epoch": 0.42171495463857184,
      "grad_norm": 0.009344886988401413,
      "learning_rate": 3.94571261340357e-05,
      "loss": 0.0002,
      "step": 1441
    },
    {
      "epoch": 0.4220076090137548,
      "grad_norm": 0.019241783767938614,
      "learning_rate": 3.944980977465613e-05,
      "loss": 0.0004,
      "step": 1442
    },
    {
      "epoch": 0.42230026338893767,
      "grad_norm": 0.11743147671222687,
      "learning_rate": 3.944249341527656e-05,
      "loss": 0.0009,
      "step": 1443
    },
    {
      "epoch": 0.42259291776412056,
      "grad_norm": 0.027256010100245476,
      "learning_rate": 3.943517705589699e-05,
      "loss": 0.0005,
      "step": 1444
    },
    {
      "epoch": 0.4228855721393035,
      "grad_norm": 0.005137627013027668,
      "learning_rate": 3.9427860696517415e-05,
      "loss": 0.0001,
      "step": 1445
    },
    {
      "epoch": 0.4231782265144864,
      "grad_norm": 4.843735694885254,
      "learning_rate": 3.942054433713784e-05,
      "loss": 0.1782,
      "step": 1446
    },
    {
      "epoch": 0.4234708808896693,
      "grad_norm": 5.0710649490356445,
      "learning_rate": 3.941322797775827e-05,
      "loss": 0.0176,
      "step": 1447
    },
    {
      "epoch": 0.4237635352648522,
      "grad_norm": 1.0121043920516968,
      "learning_rate": 3.94059116183787e-05,
      "loss": 0.0036,
      "step": 1448
    },
    {
      "epoch": 0.4240561896400351,
      "grad_norm": 0.01022071111947298,
      "learning_rate": 3.939859525899912e-05,
      "loss": 0.0002,
      "step": 1449
    },
    {
      "epoch": 0.42434884401521805,
      "grad_norm": 0.011404616758227348,
      "learning_rate": 3.939127889961955e-05,
      "loss": 0.0002,
      "step": 1450
    },
    {
      "epoch": 0.42464149839040094,
      "grad_norm": 8.245271682739258,
      "learning_rate": 3.9383962540239976e-05,
      "loss": 0.2487,
      "step": 1451
    },
    {
      "epoch": 0.42493415276558383,
      "grad_norm": 6.751049995422363,
      "learning_rate": 3.9376646180860404e-05,
      "loss": 0.0898,
      "step": 1452
    },
    {
      "epoch": 0.42522680714076677,
      "grad_norm": 0.009213696233928204,
      "learning_rate": 3.936932982148083e-05,
      "loss": 0.0003,
      "step": 1453
    },
    {
      "epoch": 0.42551946151594966,
      "grad_norm": 0.008370252326130867,
      "learning_rate": 3.936201346210126e-05,
      "loss": 0.0002,
      "step": 1454
    },
    {
      "epoch": 0.42581211589113255,
      "grad_norm": 0.01335230190306902,
      "learning_rate": 3.935469710272169e-05,
      "loss": 0.0003,
      "step": 1455
    },
    {
      "epoch": 0.4261047702663155,
      "grad_norm": 0.013338599354028702,
      "learning_rate": 3.9347380743342115e-05,
      "loss": 0.0003,
      "step": 1456
    },
    {
      "epoch": 0.4263974246414984,
      "grad_norm": 0.057257991284132004,
      "learning_rate": 3.9340064383962543e-05,
      "loss": 0.0005,
      "step": 1457
    },
    {
      "epoch": 0.4266900790166813,
      "grad_norm": 0.03446501865983009,
      "learning_rate": 3.933274802458297e-05,
      "loss": 0.0006,
      "step": 1458
    },
    {
      "epoch": 0.4269827333918642,
      "grad_norm": 4.571585178375244,
      "learning_rate": 3.93254316652034e-05,
      "loss": 0.0939,
      "step": 1459
    },
    {
      "epoch": 0.4272753877670471,
      "grad_norm": 0.02036476694047451,
      "learning_rate": 3.931811530582382e-05,
      "loss": 0.0005,
      "step": 1460
    },
    {
      "epoch": 0.42756804214223004,
      "grad_norm": 0.020145880058407784,
      "learning_rate": 3.931079894644425e-05,
      "loss": 0.0005,
      "step": 1461
    },
    {
      "epoch": 0.42786069651741293,
      "grad_norm": 0.022871676832437515,
      "learning_rate": 3.9303482587064676e-05,
      "loss": 0.0005,
      "step": 1462
    },
    {
      "epoch": 0.4281533508925958,
      "grad_norm": 0.07374252378940582,
      "learning_rate": 3.9296166227685104e-05,
      "loss": 0.0008,
      "step": 1463
    },
    {
      "epoch": 0.42844600526777876,
      "grad_norm": 0.02100226655602455,
      "learning_rate": 3.928884986830553e-05,
      "loss": 0.0004,
      "step": 1464
    },
    {
      "epoch": 0.42873865964296165,
      "grad_norm": 0.012682802975177765,
      "learning_rate": 3.928153350892596e-05,
      "loss": 0.0003,
      "step": 1465
    },
    {
      "epoch": 0.4290313140181446,
      "grad_norm": 0.01567976363003254,
      "learning_rate": 3.927421714954639e-05,
      "loss": 0.0005,
      "step": 1466
    },
    {
      "epoch": 0.4293239683933275,
      "grad_norm": 6.267890930175781,
      "learning_rate": 3.9266900790166816e-05,
      "loss": 0.0788,
      "step": 1467
    },
    {
      "epoch": 0.42961662276851037,
      "grad_norm": 0.006946507375687361,
      "learning_rate": 3.9259584430787244e-05,
      "loss": 0.0002,
      "step": 1468
    },
    {
      "epoch": 0.4299092771436933,
      "grad_norm": 0.010431395843625069,
      "learning_rate": 3.925226807140767e-05,
      "loss": 0.0002,
      "step": 1469
    },
    {
      "epoch": 0.4302019315188762,
      "grad_norm": 0.1004369780421257,
      "learning_rate": 3.924495171202809e-05,
      "loss": 0.001,
      "step": 1470
    },
    {
      "epoch": 0.4304945858940591,
      "grad_norm": 0.03698040917515755,
      "learning_rate": 3.923763535264852e-05,
      "loss": 0.0006,
      "step": 1471
    },
    {
      "epoch": 0.43078724026924203,
      "grad_norm": 0.18772642314434052,
      "learning_rate": 3.923031899326895e-05,
      "loss": 0.0018,
      "step": 1472
    },
    {
      "epoch": 0.4310798946444249,
      "grad_norm": 4.989668846130371,
      "learning_rate": 3.922300263388938e-05,
      "loss": 0.0318,
      "step": 1473
    },
    {
      "epoch": 0.43137254901960786,
      "grad_norm": 0.10625988245010376,
      "learning_rate": 3.9215686274509805e-05,
      "loss": 0.0013,
      "step": 1474
    },
    {
      "epoch": 0.43166520339479075,
      "grad_norm": 0.0064647323451936245,
      "learning_rate": 3.920836991513023e-05,
      "loss": 0.0001,
      "step": 1475
    },
    {
      "epoch": 0.43195785776997364,
      "grad_norm": 0.01839122176170349,
      "learning_rate": 3.920105355575066e-05,
      "loss": 0.0004,
      "step": 1476
    },
    {
      "epoch": 0.4322505121451566,
      "grad_norm": 0.023326152935624123,
      "learning_rate": 3.919373719637109e-05,
      "loss": 0.0005,
      "step": 1477
    },
    {
      "epoch": 0.4325431665203395,
      "grad_norm": 0.009359706193208694,
      "learning_rate": 3.918642083699152e-05,
      "loss": 0.0002,
      "step": 1478
    },
    {
      "epoch": 0.43283582089552236,
      "grad_norm": 0.009548685513436794,
      "learning_rate": 3.9179104477611945e-05,
      "loss": 0.0002,
      "step": 1479
    },
    {
      "epoch": 0.4331284752707053,
      "grad_norm": 0.4187501072883606,
      "learning_rate": 3.917178811823237e-05,
      "loss": 0.0029,
      "step": 1480
    },
    {
      "epoch": 0.4334211296458882,
      "grad_norm": 0.09940121322870255,
      "learning_rate": 3.9164471758852794e-05,
      "loss": 0.0008,
      "step": 1481
    },
    {
      "epoch": 0.43371378402107114,
      "grad_norm": 0.4981006383895874,
      "learning_rate": 3.915715539947322e-05,
      "loss": 0.0023,
      "step": 1482
    },
    {
      "epoch": 0.434006438396254,
      "grad_norm": 0.00698661245405674,
      "learning_rate": 3.914983904009365e-05,
      "loss": 0.0002,
      "step": 1483
    },
    {
      "epoch": 0.4342990927714369,
      "grad_norm": 0.016817551106214523,
      "learning_rate": 3.914252268071408e-05,
      "loss": 0.0004,
      "step": 1484
    },
    {
      "epoch": 0.43459174714661986,
      "grad_norm": 0.005778777413070202,
      "learning_rate": 3.9135206321334506e-05,
      "loss": 0.0001,
      "step": 1485
    },
    {
      "epoch": 0.43488440152180274,
      "grad_norm": 0.0036035231314599514,
      "learning_rate": 3.9127889961954934e-05,
      "loss": 0.0001,
      "step": 1486
    },
    {
      "epoch": 0.4351770558969857,
      "grad_norm": 0.007998858578503132,
      "learning_rate": 3.912057360257536e-05,
      "loss": 0.0001,
      "step": 1487
    },
    {
      "epoch": 0.4354697102721686,
      "grad_norm": 1.1437442302703857,
      "learning_rate": 3.911325724319579e-05,
      "loss": 0.0076,
      "step": 1488
    },
    {
      "epoch": 0.43576236464735146,
      "grad_norm": 0.0033962379675358534,
      "learning_rate": 3.910594088381622e-05,
      "loss": 0.0001,
      "step": 1489
    },
    {
      "epoch": 0.4360550190225344,
      "grad_norm": 0.010588807053864002,
      "learning_rate": 3.9098624524436645e-05,
      "loss": 0.0002,
      "step": 1490
    },
    {
      "epoch": 0.4363476733977173,
      "grad_norm": 0.006284533068537712,
      "learning_rate": 3.9091308165057067e-05,
      "loss": 0.0001,
      "step": 1491
    },
    {
      "epoch": 0.4366403277729002,
      "grad_norm": 0.007512817159295082,
      "learning_rate": 3.9083991805677495e-05,
      "loss": 0.0001,
      "step": 1492
    },
    {
      "epoch": 0.4369329821480831,
      "grad_norm": 0.5369182825088501,
      "learning_rate": 3.907667544629792e-05,
      "loss": 0.0025,
      "step": 1493
    },
    {
      "epoch": 0.437225636523266,
      "grad_norm": 0.00482457410544157,
      "learning_rate": 3.906935908691835e-05,
      "loss": 0.0001,
      "step": 1494
    },
    {
      "epoch": 0.43751829089844896,
      "grad_norm": 0.021206054836511612,
      "learning_rate": 3.906204272753878e-05,
      "loss": 0.0002,
      "step": 1495
    },
    {
      "epoch": 0.43781094527363185,
      "grad_norm": 0.007357397116720676,
      "learning_rate": 3.9054726368159206e-05,
      "loss": 0.0001,
      "step": 1496
    },
    {
      "epoch": 0.43810359964881473,
      "grad_norm": 0.0006888994830660522,
      "learning_rate": 3.9047410008779634e-05,
      "loss": 0.0,
      "step": 1497
    },
    {
      "epoch": 0.4383962540239977,
      "grad_norm": 0.00313826696947217,
      "learning_rate": 3.904009364940006e-05,
      "loss": 0.0001,
      "step": 1498
    },
    {
      "epoch": 0.43868890839918057,
      "grad_norm": 0.007373438682407141,
      "learning_rate": 3.903277729002049e-05,
      "loss": 0.0001,
      "step": 1499
    },
    {
      "epoch": 0.43898156277436345,
      "grad_norm": 0.06540460139513016,
      "learning_rate": 3.902546093064092e-05,
      "loss": 0.0004,
      "step": 1500
    },
    {
      "epoch": 0.4392742171495464,
      "grad_norm": 11.630769729614258,
      "learning_rate": 3.9018144571261346e-05,
      "loss": 0.1478,
      "step": 1501
    },
    {
      "epoch": 0.4395668715247293,
      "grad_norm": 1.5413963794708252,
      "learning_rate": 3.901082821188177e-05,
      "loss": 0.0044,
      "step": 1502
    },
    {
      "epoch": 0.43985952589991223,
      "grad_norm": 0.0013103276723995805,
      "learning_rate": 3.9003511852502195e-05,
      "loss": 0.0,
      "step": 1503
    },
    {
      "epoch": 0.4401521802750951,
      "grad_norm": 0.0021003279834985733,
      "learning_rate": 3.899619549312262e-05,
      "loss": 0.0001,
      "step": 1504
    },
    {
      "epoch": 0.440444834650278,
      "grad_norm": 4.0217976570129395,
      "learning_rate": 3.898887913374305e-05,
      "loss": 0.1454,
      "step": 1505
    },
    {
      "epoch": 0.44073748902546095,
      "grad_norm": 0.05071612820029259,
      "learning_rate": 3.898156277436348e-05,
      "loss": 0.0002,
      "step": 1506
    },
    {
      "epoch": 0.44103014340064384,
      "grad_norm": 0.0028761134017258883,
      "learning_rate": 3.897424641498391e-05,
      "loss": 0.0001,
      "step": 1507
    },
    {
      "epoch": 0.4413227977758267,
      "grad_norm": 1.7916882038116455,
      "learning_rate": 3.8966930055604335e-05,
      "loss": 0.0058,
      "step": 1508
    },
    {
      "epoch": 0.44161545215100967,
      "grad_norm": 0.0016702886205166578,
      "learning_rate": 3.895961369622476e-05,
      "loss": 0.0,
      "step": 1509
    },
    {
      "epoch": 0.44190810652619256,
      "grad_norm": 0.06896457821130753,
      "learning_rate": 3.895229733684519e-05,
      "loss": 0.0004,
      "step": 1510
    },
    {
      "epoch": 0.4422007609013755,
      "grad_norm": 0.013217308558523655,
      "learning_rate": 3.894498097746562e-05,
      "loss": 0.0002,
      "step": 1511
    },
    {
      "epoch": 0.4424934152765584,
      "grad_norm": 0.007142780348658562,
      "learning_rate": 3.893766461808605e-05,
      "loss": 0.0001,
      "step": 1512
    },
    {
      "epoch": 0.4427860696517413,
      "grad_norm": 0.10819875448942184,
      "learning_rate": 3.893034825870647e-05,
      "loss": 0.0005,
      "step": 1513
    },
    {
      "epoch": 0.4430787240269242,
      "grad_norm": 0.4063895344734192,
      "learning_rate": 3.8923031899326896e-05,
      "loss": 0.0009,
      "step": 1514
    },
    {
      "epoch": 0.4433713784021071,
      "grad_norm": 2.506284713745117,
      "learning_rate": 3.8915715539947324e-05,
      "loss": 0.249,
      "step": 1515
    },
    {
      "epoch": 0.44366403277729,
      "grad_norm": 0.004622430540621281,
      "learning_rate": 3.890839918056775e-05,
      "loss": 0.0001,
      "step": 1516
    },
    {
      "epoch": 0.44395668715247294,
      "grad_norm": 0.007036368362605572,
      "learning_rate": 3.890108282118818e-05,
      "loss": 0.0001,
      "step": 1517
    },
    {
      "epoch": 0.4442493415276558,
      "grad_norm": 0.0023173808585852385,
      "learning_rate": 3.889376646180861e-05,
      "loss": 0.0,
      "step": 1518
    },
    {
      "epoch": 0.44454199590283877,
      "grad_norm": 2.48722243309021,
      "learning_rate": 3.8886450102429036e-05,
      "loss": 0.1683,
      "step": 1519
    },
    {
      "epoch": 0.44483465027802166,
      "grad_norm": 0.0057946923188865185,
      "learning_rate": 3.8879133743049464e-05,
      "loss": 0.0001,
      "step": 1520
    },
    {
      "epoch": 0.44512730465320455,
      "grad_norm": 0.09324245899915695,
      "learning_rate": 3.887181738366989e-05,
      "loss": 0.0014,
      "step": 1521
    },
    {
      "epoch": 0.4454199590283875,
      "grad_norm": 0.3141811490058899,
      "learning_rate": 3.886450102429032e-05,
      "loss": 0.0023,
      "step": 1522
    },
    {
      "epoch": 0.4457126134035704,
      "grad_norm": 0.010908433236181736,
      "learning_rate": 3.885718466491074e-05,
      "loss": 0.0002,
      "step": 1523
    },
    {
      "epoch": 0.44600526777875327,
      "grad_norm": 0.04157865792512894,
      "learning_rate": 3.884986830553117e-05,
      "loss": 0.0007,
      "step": 1524
    },
    {
      "epoch": 0.4462979221539362,
      "grad_norm": 0.3245861232280731,
      "learning_rate": 3.8842551946151597e-05,
      "loss": 0.0029,
      "step": 1525
    },
    {
      "epoch": 0.4465905765291191,
      "grad_norm": 0.08890893310308456,
      "learning_rate": 3.8835235586772025e-05,
      "loss": 0.0012,
      "step": 1526
    },
    {
      "epoch": 0.44688323090430204,
      "grad_norm": 0.18287794291973114,
      "learning_rate": 3.882791922739245e-05,
      "loss": 0.0031,
      "step": 1527
    },
    {
      "epoch": 0.44717588527948493,
      "grad_norm": 0.14691399037837982,
      "learning_rate": 3.882060286801288e-05,
      "loss": 0.0013,
      "step": 1528
    },
    {
      "epoch": 0.4474685396546678,
      "grad_norm": 0.008837338536977768,
      "learning_rate": 3.881328650863331e-05,
      "loss": 0.0002,
      "step": 1529
    },
    {
      "epoch": 0.44776119402985076,
      "grad_norm": 0.15746049582958221,
      "learning_rate": 3.8805970149253736e-05,
      "loss": 0.0016,
      "step": 1530
    },
    {
      "epoch": 0.44805384840503365,
      "grad_norm": 1.1990667581558228,
      "learning_rate": 3.8798653789874164e-05,
      "loss": 0.0195,
      "step": 1531
    },
    {
      "epoch": 0.44834650278021654,
      "grad_norm": 10.3333740234375,
      "learning_rate": 3.879133743049459e-05,
      "loss": 0.1041,
      "step": 1532
    },
    {
      "epoch": 0.4486391571553995,
      "grad_norm": 0.15595147013664246,
      "learning_rate": 3.878402107111502e-05,
      "loss": 0.0015,
      "step": 1533
    },
    {
      "epoch": 0.44893181153058237,
      "grad_norm": 0.018397051841020584,
      "learning_rate": 3.877670471173544e-05,
      "loss": 0.0004,
      "step": 1534
    },
    {
      "epoch": 0.4492244659057653,
      "grad_norm": 0.00914598349481821,
      "learning_rate": 3.876938835235587e-05,
      "loss": 0.0002,
      "step": 1535
    },
    {
      "epoch": 0.4495171202809482,
      "grad_norm": 0.10302092880010605,
      "learning_rate": 3.87620719929763e-05,
      "loss": 0.0014,
      "step": 1536
    },
    {
      "epoch": 0.4498097746561311,
      "grad_norm": 0.062038298696279526,
      "learning_rate": 3.8754755633596725e-05,
      "loss": 0.0011,
      "step": 1537
    },
    {
      "epoch": 0.45010242903131403,
      "grad_norm": 0.1500018686056137,
      "learning_rate": 3.874743927421715e-05,
      "loss": 0.0015,
      "step": 1538
    },
    {
      "epoch": 0.4503950834064969,
      "grad_norm": 0.016570372506976128,
      "learning_rate": 3.874012291483758e-05,
      "loss": 0.0004,
      "step": 1539
    },
    {
      "epoch": 0.4506877377816798,
      "grad_norm": 0.01027642097324133,
      "learning_rate": 3.873280655545801e-05,
      "loss": 0.0002,
      "step": 1540
    },
    {
      "epoch": 0.45098039215686275,
      "grad_norm": 0.011809996329247952,
      "learning_rate": 3.872549019607844e-05,
      "loss": 0.0003,
      "step": 1541
    },
    {
      "epoch": 0.45127304653204564,
      "grad_norm": 0.05385143309831619,
      "learning_rate": 3.8718173836698865e-05,
      "loss": 0.0004,
      "step": 1542
    },
    {
      "epoch": 0.4515657009072286,
      "grad_norm": 0.19968895614147186,
      "learning_rate": 3.871085747731929e-05,
      "loss": 0.001,
      "step": 1543
    },
    {
      "epoch": 0.4518583552824115,
      "grad_norm": 0.004949803464114666,
      "learning_rate": 3.870354111793972e-05,
      "loss": 0.0001,
      "step": 1544
    },
    {
      "epoch": 0.45215100965759436,
      "grad_norm": 0.007569814566522837,
      "learning_rate": 3.869622475856014e-05,
      "loss": 0.0002,
      "step": 1545
    },
    {
      "epoch": 0.4524436640327773,
      "grad_norm": 0.006508437916636467,
      "learning_rate": 3.868890839918057e-05,
      "loss": 0.0002,
      "step": 1546
    },
    {
      "epoch": 0.4527363184079602,
      "grad_norm": 0.07787645608186722,
      "learning_rate": 3.8681592039801e-05,
      "loss": 0.0008,
      "step": 1547
    },
    {
      "epoch": 0.45302897278314314,
      "grad_norm": 0.01738792657852173,
      "learning_rate": 3.8674275680421426e-05,
      "loss": 0.0003,
      "step": 1548
    },
    {
      "epoch": 0.453321627158326,
      "grad_norm": 0.2992579936981201,
      "learning_rate": 3.8666959321041854e-05,
      "loss": 0.001,
      "step": 1549
    },
    {
      "epoch": 0.4536142815335089,
      "grad_norm": 0.061419274657964706,
      "learning_rate": 3.865964296166228e-05,
      "loss": 0.0004,
      "step": 1550
    },
    {
      "epoch": 0.45390693590869186,
      "grad_norm": 0.013273519463837147,
      "learning_rate": 3.865232660228271e-05,
      "loss": 0.0003,
      "step": 1551
    },
    {
      "epoch": 0.45419959028387474,
      "grad_norm": 0.004993957933038473,
      "learning_rate": 3.864501024290314e-05,
      "loss": 0.0001,
      "step": 1552
    },
    {
      "epoch": 0.45449224465905763,
      "grad_norm": 0.04157587140798569,
      "learning_rate": 3.8637693883523566e-05,
      "loss": 0.0007,
      "step": 1553
    },
    {
      "epoch": 0.4547848990342406,
      "grad_norm": 0.004822755232453346,
      "learning_rate": 3.8630377524143994e-05,
      "loss": 0.0001,
      "step": 1554
    },
    {
      "epoch": 0.45507755340942346,
      "grad_norm": 0.007024036720395088,
      "learning_rate": 3.8623061164764415e-05,
      "loss": 0.0001,
      "step": 1555
    },
    {
      "epoch": 0.4553702077846064,
      "grad_norm": 0.0037519552279263735,
      "learning_rate": 3.861574480538484e-05,
      "loss": 0.0001,
      "step": 1556
    },
    {
      "epoch": 0.4556628621597893,
      "grad_norm": 0.003447313094511628,
      "learning_rate": 3.860842844600527e-05,
      "loss": 0.0001,
      "step": 1557
    },
    {
      "epoch": 0.4559555165349722,
      "grad_norm": 0.002084544859826565,
      "learning_rate": 3.86011120866257e-05,
      "loss": 0.0001,
      "step": 1558
    },
    {
      "epoch": 0.4562481709101551,
      "grad_norm": 0.0013182968832552433,
      "learning_rate": 3.8593795727246127e-05,
      "loss": 0.0,
      "step": 1559
    },
    {
      "epoch": 0.456540825285338,
      "grad_norm": 0.0034747810568660498,
      "learning_rate": 3.8586479367866555e-05,
      "loss": 0.0001,
      "step": 1560
    },
    {
      "epoch": 0.4568334796605209,
      "grad_norm": 0.041806068271398544,
      "learning_rate": 3.857916300848698e-05,
      "loss": 0.0002,
      "step": 1561
    },
    {
      "epoch": 0.45712613403570385,
      "grad_norm": 0.0035612063948065042,
      "learning_rate": 3.857184664910741e-05,
      "loss": 0.0001,
      "step": 1562
    },
    {
      "epoch": 0.45741878841088673,
      "grad_norm": 0.008042696863412857,
      "learning_rate": 3.856453028972783e-05,
      "loss": 0.0001,
      "step": 1563
    },
    {
      "epoch": 0.4577114427860697,
      "grad_norm": 0.0036441178526729345,
      "learning_rate": 3.855721393034826e-05,
      "loss": 0.0001,
      "step": 1564
    },
    {
      "epoch": 0.45800409716125257,
      "grad_norm": 0.014042911119759083,
      "learning_rate": 3.854989757096869e-05,
      "loss": 0.0001,
      "step": 1565
    },
    {
      "epoch": 0.45829675153643545,
      "grad_norm": 0.0040075695142149925,
      "learning_rate": 3.8542581211589115e-05,
      "loss": 0.0001,
      "step": 1566
    },
    {
      "epoch": 0.4585894059116184,
      "grad_norm": 0.0015805475413799286,
      "learning_rate": 3.8535264852209543e-05,
      "loss": 0.0,
      "step": 1567
    },
    {
      "epoch": 0.4588820602868013,
      "grad_norm": 0.007362937089055777,
      "learning_rate": 3.852794849282997e-05,
      "loss": 0.0001,
      "step": 1568
    },
    {
      "epoch": 0.4591747146619842,
      "grad_norm": 0.0010071613360196352,
      "learning_rate": 3.85206321334504e-05,
      "loss": 0.0,
      "step": 1569
    },
    {
      "epoch": 0.4594673690371671,
      "grad_norm": 0.0006803566357120872,
      "learning_rate": 3.851331577407083e-05,
      "loss": 0.0,
      "step": 1570
    },
    {
      "epoch": 0.45976002341235,
      "grad_norm": 0.0031457000877708197,
      "learning_rate": 3.850599941469125e-05,
      "loss": 0.0001,
      "step": 1571
    },
    {
      "epoch": 0.46005267778753295,
      "grad_norm": 0.010785883292555809,
      "learning_rate": 3.8498683055311676e-05,
      "loss": 0.0001,
      "step": 1572
    },
    {
      "epoch": 0.46034533216271584,
      "grad_norm": 4.436830043792725,
      "learning_rate": 3.8491366695932104e-05,
      "loss": 0.1064,
      "step": 1573
    },
    {
      "epoch": 0.4606379865378987,
      "grad_norm": 0.012927822768688202,
      "learning_rate": 3.848405033655253e-05,
      "loss": 0.0001,
      "step": 1574
    },
    {
      "epoch": 0.46093064091308167,
      "grad_norm": 0.020288215950131416,
      "learning_rate": 3.847673397717296e-05,
      "loss": 0.0001,
      "step": 1575
    },
    {
      "epoch": 0.46122329528826456,
      "grad_norm": 0.006180945783853531,
      "learning_rate": 3.846941761779339e-05,
      "loss": 0.0001,
      "step": 1576
    },
    {
      "epoch": 0.46151594966344744,
      "grad_norm": 4.199718475341797,
      "learning_rate": 3.8462101258413816e-05,
      "loss": 0.0074,
      "step": 1577
    },
    {
      "epoch": 0.4618086040386304,
      "grad_norm": 0.7159136533737183,
      "learning_rate": 3.8454784899034244e-05,
      "loss": 0.0059,
      "step": 1578
    },
    {
      "epoch": 0.4621012584138133,
      "grad_norm": 0.0019133149180561304,
      "learning_rate": 3.8447468539654665e-05,
      "loss": 0.0,
      "step": 1579
    },
    {
      "epoch": 0.4623939127889962,
      "grad_norm": 0.0006168386898934841,
      "learning_rate": 3.844015218027509e-05,
      "loss": 0.0,
      "step": 1580
    },
    {
      "epoch": 0.4626865671641791,
      "grad_norm": 0.0026355122681707144,
      "learning_rate": 3.843283582089552e-05,
      "loss": 0.0,
      "step": 1581
    },
    {
      "epoch": 0.462979221539362,
      "grad_norm": 0.0010602109832689166,
      "learning_rate": 3.842551946151595e-05,
      "loss": 0.0,
      "step": 1582
    },
    {
      "epoch": 0.46327187591454494,
      "grad_norm": 0.0019133257446810603,
      "learning_rate": 3.841820310213638e-05,
      "loss": 0.0,
      "step": 1583
    },
    {
      "epoch": 0.4635645302897278,
      "grad_norm": 0.0017432968597859144,
      "learning_rate": 3.8410886742756805e-05,
      "loss": 0.0,
      "step": 1584
    },
    {
      "epoch": 0.4638571846649107,
      "grad_norm": 4.2040534019470215,
      "learning_rate": 3.840357038337723e-05,
      "loss": 0.0155,
      "step": 1585
    },
    {
      "epoch": 0.46414983904009366,
      "grad_norm": 13.846515655517578,
      "learning_rate": 3.839625402399766e-05,
      "loss": 0.0595,
      "step": 1586
    },
    {
      "epoch": 0.46444249341527655,
      "grad_norm": 8.511911392211914,
      "learning_rate": 3.838893766461808e-05,
      "loss": 0.1244,
      "step": 1587
    },
    {
      "epoch": 0.4647351477904595,
      "grad_norm": 0.0030939967837184668,
      "learning_rate": 3.838162130523851e-05,
      "loss": 0.0,
      "step": 1588
    },
    {
      "epoch": 0.4650278021656424,
      "grad_norm": 0.8021648526191711,
      "learning_rate": 3.837430494585894e-05,
      "loss": 0.0017,
      "step": 1589
    },
    {
      "epoch": 0.46532045654082527,
      "grad_norm": 0.002673780545592308,
      "learning_rate": 3.8366988586479366e-05,
      "loss": 0.0001,
      "step": 1590
    },
    {
      "epoch": 0.4656131109160082,
      "grad_norm": 8.91903018951416,
      "learning_rate": 3.8359672227099794e-05,
      "loss": 0.1671,
      "step": 1591
    },
    {
      "epoch": 0.4659057652911911,
      "grad_norm": 0.002431961242109537,
      "learning_rate": 3.835235586772022e-05,
      "loss": 0.0,
      "step": 1592
    },
    {
      "epoch": 0.466198419666374,
      "grad_norm": 7.502378463745117,
      "learning_rate": 3.834503950834065e-05,
      "loss": 0.0763,
      "step": 1593
    },
    {
      "epoch": 0.46649107404155693,
      "grad_norm": 0.017539622262120247,
      "learning_rate": 3.833772314896108e-05,
      "loss": 0.0002,
      "step": 1594
    },
    {
      "epoch": 0.4667837284167398,
      "grad_norm": 0.17085938155651093,
      "learning_rate": 3.8330406789581506e-05,
      "loss": 0.001,
      "step": 1595
    },
    {
      "epoch": 0.46707638279192276,
      "grad_norm": 1.729633092880249,
      "learning_rate": 3.8323090430201934e-05,
      "loss": 0.2641,
      "step": 1596
    },
    {
      "epoch": 0.46736903716710565,
      "grad_norm": 0.06899145990610123,
      "learning_rate": 3.831577407082236e-05,
      "loss": 0.0006,
      "step": 1597
    },
    {
      "epoch": 0.46766169154228854,
      "grad_norm": 0.004920002538710833,
      "learning_rate": 3.830845771144278e-05,
      "loss": 0.0001,
      "step": 1598
    },
    {
      "epoch": 0.4679543459174715,
      "grad_norm": 6.381435871124268,
      "learning_rate": 3.830114135206321e-05,
      "loss": 0.0941,
      "step": 1599
    },
    {
      "epoch": 0.46824700029265437,
      "grad_norm": 0.010226819664239883,
      "learning_rate": 3.829382499268364e-05,
      "loss": 0.0002,
      "step": 1600
    },
    {
      "epoch": 0.46853965466783726,
      "grad_norm": 7.104165077209473,
      "learning_rate": 3.8286508633304067e-05,
      "loss": 0.1188,
      "step": 1601
    },
    {
      "epoch": 0.4688323090430202,
      "grad_norm": 0.02988087385892868,
      "learning_rate": 3.8279192273924495e-05,
      "loss": 0.0006,
      "step": 1602
    },
    {
      "epoch": 0.4691249634182031,
      "grad_norm": 0.07559366524219513,
      "learning_rate": 3.827187591454492e-05,
      "loss": 0.0013,
      "step": 1603
    },
    {
      "epoch": 0.46941761779338603,
      "grad_norm": 0.11377298086881638,
      "learning_rate": 3.826455955516535e-05,
      "loss": 0.0023,
      "step": 1604
    },
    {
      "epoch": 0.4697102721685689,
      "grad_norm": 0.29434069991111755,
      "learning_rate": 3.825724319578578e-05,
      "loss": 0.0044,
      "step": 1605
    },
    {
      "epoch": 0.4700029265437518,
      "grad_norm": 0.18786948919296265,
      "learning_rate": 3.8249926836406206e-05,
      "loss": 0.004,
      "step": 1606
    },
    {
      "epoch": 0.47029558091893475,
      "grad_norm": 0.12316965311765671,
      "learning_rate": 3.8242610477026634e-05,
      "loss": 0.0024,
      "step": 1607
    },
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 0.03546321019530296,
      "learning_rate": 3.8235294117647055e-05,
      "loss": 0.0007,
      "step": 1608
    },
    {
      "epoch": 0.47088088966930053,
      "grad_norm": 0.08442742377519608,
      "learning_rate": 3.8227977758267483e-05,
      "loss": 0.0018,
      "step": 1609
    },
    {
      "epoch": 0.47117354404448347,
      "grad_norm": 0.07069230824708939,
      "learning_rate": 3.822066139888791e-05,
      "loss": 0.0006,
      "step": 1610
    },
    {
      "epoch": 0.47146619841966636,
      "grad_norm": 0.08098865300416946,
      "learning_rate": 3.821334503950834e-05,
      "loss": 0.0016,
      "step": 1611
    },
    {
      "epoch": 0.4717588527948493,
      "grad_norm": 0.1017969399690628,
      "learning_rate": 3.820602868012877e-05,
      "loss": 0.0017,
      "step": 1612
    },
    {
      "epoch": 0.4720515071700322,
      "grad_norm": 0.05172237381339073,
      "learning_rate": 3.8198712320749195e-05,
      "loss": 0.0011,
      "step": 1613
    },
    {
      "epoch": 0.4723441615452151,
      "grad_norm": 6.959278106689453,
      "learning_rate": 3.819139596136962e-05,
      "loss": 0.0355,
      "step": 1614
    },
    {
      "epoch": 0.472636815920398,
      "grad_norm": 0.010147632099688053,
      "learning_rate": 3.818407960199005e-05,
      "loss": 0.0003,
      "step": 1615
    },
    {
      "epoch": 0.4729294702955809,
      "grad_norm": 0.08733673393726349,
      "learning_rate": 3.817676324261048e-05,
      "loss": 0.0007,
      "step": 1616
    },
    {
      "epoch": 0.47322212467076386,
      "grad_norm": 0.015223219059407711,
      "learning_rate": 3.816944688323091e-05,
      "loss": 0.0004,
      "step": 1617
    },
    {
      "epoch": 0.47351477904594674,
      "grad_norm": 2.2570912837982178,
      "learning_rate": 3.8162130523851335e-05,
      "loss": 0.0089,
      "step": 1618
    },
    {
      "epoch": 0.47380743342112963,
      "grad_norm": 2.3515350818634033,
      "learning_rate": 3.8154814164471756e-05,
      "loss": 0.0106,
      "step": 1619
    },
    {
      "epoch": 0.4741000877963126,
      "grad_norm": 0.02242353744804859,
      "learning_rate": 3.8147497805092184e-05,
      "loss": 0.0004,
      "step": 1620
    },
    {
      "epoch": 0.47439274217149546,
      "grad_norm": 0.10555027425289154,
      "learning_rate": 3.814018144571261e-05,
      "loss": 0.0011,
      "step": 1621
    },
    {
      "epoch": 0.47468539654667835,
      "grad_norm": 4.0205769538879395,
      "learning_rate": 3.813286508633304e-05,
      "loss": 0.0072,
      "step": 1622
    },
    {
      "epoch": 0.4749780509218613,
      "grad_norm": 2.659857749938965,
      "learning_rate": 3.812554872695347e-05,
      "loss": 0.0142,
      "step": 1623
    },
    {
      "epoch": 0.4752707052970442,
      "grad_norm": 6.0880608558654785,
      "learning_rate": 3.8118232367573896e-05,
      "loss": 0.1141,
      "step": 1624
    },
    {
      "epoch": 0.4755633596722271,
      "grad_norm": 0.007533062249422073,
      "learning_rate": 3.8110916008194324e-05,
      "loss": 0.0002,
      "step": 1625
    },
    {
      "epoch": 0.47585601404741,
      "grad_norm": 0.09547273814678192,
      "learning_rate": 3.810359964881475e-05,
      "loss": 0.0005,
      "step": 1626
    },
    {
      "epoch": 0.4761486684225929,
      "grad_norm": 0.22707970440387726,
      "learning_rate": 3.809628328943518e-05,
      "loss": 0.0009,
      "step": 1627
    },
    {
      "epoch": 0.47644132279777585,
      "grad_norm": 0.0026254430413246155,
      "learning_rate": 3.808896693005561e-05,
      "loss": 0.0,
      "step": 1628
    },
    {
      "epoch": 0.47673397717295873,
      "grad_norm": 0.016741158440709114,
      "learning_rate": 3.808165057067603e-05,
      "loss": 0.0003,
      "step": 1629
    },
    {
      "epoch": 0.4770266315481416,
      "grad_norm": 10.494945526123047,
      "learning_rate": 3.807433421129646e-05,
      "loss": 0.0877,
      "step": 1630
    },
    {
      "epoch": 0.47731928592332457,
      "grad_norm": 13.728397369384766,
      "learning_rate": 3.8067017851916885e-05,
      "loss": 0.1001,
      "step": 1631
    },
    {
      "epoch": 0.47761194029850745,
      "grad_norm": 0.011857084929943085,
      "learning_rate": 3.805970149253731e-05,
      "loss": 0.0002,
      "step": 1632
    },
    {
      "epoch": 0.4779045946736904,
      "grad_norm": 8.238283157348633,
      "learning_rate": 3.805238513315774e-05,
      "loss": 0.0229,
      "step": 1633
    },
    {
      "epoch": 0.4781972490488733,
      "grad_norm": 7.15416955947876,
      "learning_rate": 3.804506877377817e-05,
      "loss": 0.0276,
      "step": 1634
    },
    {
      "epoch": 0.4784899034240562,
      "grad_norm": 0.009290005080401897,
      "learning_rate": 3.8037752414398597e-05,
      "loss": 0.0002,
      "step": 1635
    },
    {
      "epoch": 0.4787825577992391,
      "grad_norm": 0.009782211855053902,
      "learning_rate": 3.8030436055019025e-05,
      "loss": 0.0002,
      "step": 1636
    },
    {
      "epoch": 0.479075212174422,
      "grad_norm": 0.0036196867004036903,
      "learning_rate": 3.802311969563945e-05,
      "loss": 0.0001,
      "step": 1637
    },
    {
      "epoch": 0.4793678665496049,
      "grad_norm": 0.007514093536883593,
      "learning_rate": 3.801580333625988e-05,
      "loss": 0.0002,
      "step": 1638
    },
    {
      "epoch": 0.47966052092478784,
      "grad_norm": 0.07700416445732117,
      "learning_rate": 3.800848697688031e-05,
      "loss": 0.0008,
      "step": 1639
    },
    {
      "epoch": 0.4799531752999707,
      "grad_norm": 0.06622344255447388,
      "learning_rate": 3.800117061750073e-05,
      "loss": 0.0004,
      "step": 1640
    },
    {
      "epoch": 0.48024582967515367,
      "grad_norm": 0.03312865272164345,
      "learning_rate": 3.799385425812116e-05,
      "loss": 0.0004,
      "step": 1641
    },
    {
      "epoch": 0.48053848405033656,
      "grad_norm": 8.950428009033203,
      "learning_rate": 3.7986537898741585e-05,
      "loss": 0.0752,
      "step": 1642
    },
    {
      "epoch": 0.48083113842551944,
      "grad_norm": 0.01622932218015194,
      "learning_rate": 3.7979221539362013e-05,
      "loss": 0.0001,
      "step": 1643
    },
    {
      "epoch": 0.4811237928007024,
      "grad_norm": 5.980432510375977,
      "learning_rate": 3.797190517998244e-05,
      "loss": 0.0757,
      "step": 1644
    },
    {
      "epoch": 0.4814164471758853,
      "grad_norm": 5.4728217124938965,
      "learning_rate": 3.796458882060287e-05,
      "loss": 0.13,
      "step": 1645
    },
    {
      "epoch": 0.48170910155106816,
      "grad_norm": 0.004476544447243214,
      "learning_rate": 3.79572724612233e-05,
      "loss": 0.0001,
      "step": 1646
    },
    {
      "epoch": 0.4820017559262511,
      "grad_norm": 0.0021811951883137226,
      "learning_rate": 3.7949956101843725e-05,
      "loss": 0.0001,
      "step": 1647
    },
    {
      "epoch": 0.482294410301434,
      "grad_norm": 0.0010316645493730903,
      "learning_rate": 3.794263974246415e-05,
      "loss": 0.0,
      "step": 1648
    },
    {
      "epoch": 0.48258706467661694,
      "grad_norm": 0.25734391808509827,
      "learning_rate": 3.793532338308458e-05,
      "loss": 0.0017,
      "step": 1649
    },
    {
      "epoch": 0.4828797190517998,
      "grad_norm": 0.0003380229463800788,
      "learning_rate": 3.792800702370501e-05,
      "loss": 0.0,
      "step": 1650
    },
    {
      "epoch": 0.4831723734269827,
      "grad_norm": 18.4676513671875,
      "learning_rate": 3.792069066432543e-05,
      "loss": 0.0973,
      "step": 1651
    },
    {
      "epoch": 0.48346502780216566,
      "grad_norm": 0.015821581706404686,
      "learning_rate": 3.791337430494586e-05,
      "loss": 0.0003,
      "step": 1652
    },
    {
      "epoch": 0.48375768217734855,
      "grad_norm": 2.85701060295105,
      "learning_rate": 3.7906057945566286e-05,
      "loss": 0.0129,
      "step": 1653
    },
    {
      "epoch": 0.48405033655253143,
      "grad_norm": 0.15178117156028748,
      "learning_rate": 3.7898741586186714e-05,
      "loss": 0.0019,
      "step": 1654
    },
    {
      "epoch": 0.4843429909277144,
      "grad_norm": 0.014793816953897476,
      "learning_rate": 3.789142522680714e-05,
      "loss": 0.0003,
      "step": 1655
    },
    {
      "epoch": 0.48463564530289727,
      "grad_norm": 9.333024024963379,
      "learning_rate": 3.788410886742757e-05,
      "loss": 0.0552,
      "step": 1656
    },
    {
      "epoch": 0.4849282996780802,
      "grad_norm": 0.03246520087122917,
      "learning_rate": 3.7876792508048e-05,
      "loss": 0.0005,
      "step": 1657
    },
    {
      "epoch": 0.4852209540532631,
      "grad_norm": 0.005863872356712818,
      "learning_rate": 3.7869476148668426e-05,
      "loss": 0.0001,
      "step": 1658
    },
    {
      "epoch": 0.485513608428446,
      "grad_norm": 0.02286364510655403,
      "learning_rate": 3.7862159789288854e-05,
      "loss": 0.0006,
      "step": 1659
    },
    {
      "epoch": 0.48580626280362893,
      "grad_norm": 0.017278209328651428,
      "learning_rate": 3.785484342990928e-05,
      "loss": 0.0003,
      "step": 1660
    },
    {
      "epoch": 0.4860989171788118,
      "grad_norm": 9.417853355407715,
      "learning_rate": 3.78475270705297e-05,
      "loss": 0.0752,
      "step": 1661
    },
    {
      "epoch": 0.4863915715539947,
      "grad_norm": 0.08602503687143326,
      "learning_rate": 3.784021071115013e-05,
      "loss": 0.0004,
      "step": 1662
    },
    {
      "epoch": 0.48668422592917765,
      "grad_norm": 0.05982265621423721,
      "learning_rate": 3.783289435177056e-05,
      "loss": 0.0003,
      "step": 1663
    },
    {
      "epoch": 0.48697688030436054,
      "grad_norm": 5.121674060821533,
      "learning_rate": 3.782557799239099e-05,
      "loss": 0.0837,
      "step": 1664
    },
    {
      "epoch": 0.4872695346795435,
      "grad_norm": 0.00907017569988966,
      "learning_rate": 3.7818261633011415e-05,
      "loss": 0.0003,
      "step": 1665
    },
    {
      "epoch": 0.48756218905472637,
      "grad_norm": 0.10281817615032196,
      "learning_rate": 3.781094527363184e-05,
      "loss": 0.0006,
      "step": 1666
    },
    {
      "epoch": 0.48785484342990926,
      "grad_norm": 2.749037504196167,
      "learning_rate": 3.780362891425227e-05,
      "loss": 0.0626,
      "step": 1667
    },
    {
      "epoch": 0.4881474978050922,
      "grad_norm": 1.7417001724243164,
      "learning_rate": 3.77963125548727e-05,
      "loss": 0.0048,
      "step": 1668
    },
    {
      "epoch": 0.4884401521802751,
      "grad_norm": 0.02646813727915287,
      "learning_rate": 3.7788996195493127e-05,
      "loss": 0.0003,
      "step": 1669
    },
    {
      "epoch": 0.488732806555458,
      "grad_norm": 0.040591128170490265,
      "learning_rate": 3.7781679836113555e-05,
      "loss": 0.0008,
      "step": 1670
    },
    {
      "epoch": 0.4890254609306409,
      "grad_norm": 0.09096373617649078,
      "learning_rate": 3.777436347673398e-05,
      "loss": 0.0014,
      "step": 1671
    },
    {
      "epoch": 0.4893181153058238,
      "grad_norm": 0.06142628937959671,
      "learning_rate": 3.7767047117354404e-05,
      "loss": 0.001,
      "step": 1672
    },
    {
      "epoch": 0.48961076968100675,
      "grad_norm": 0.011314328759908676,
      "learning_rate": 3.775973075797483e-05,
      "loss": 0.0002,
      "step": 1673
    },
    {
      "epoch": 0.48990342405618964,
      "grad_norm": 8.231575012207031,
      "learning_rate": 3.775241439859526e-05,
      "loss": 0.093,
      "step": 1674
    },
    {
      "epoch": 0.49019607843137253,
      "grad_norm": 0.314681738615036,
      "learning_rate": 3.774509803921569e-05,
      "loss": 0.0042,
      "step": 1675
    },
    {
      "epoch": 0.49048873280655547,
      "grad_norm": 0.04359268397092819,
      "learning_rate": 3.7737781679836115e-05,
      "loss": 0.0008,
      "step": 1676
    },
    {
      "epoch": 0.49078138718173836,
      "grad_norm": 1.3855690956115723,
      "learning_rate": 3.773046532045654e-05,
      "loss": 0.0126,
      "step": 1677
    },
    {
      "epoch": 0.4910740415569213,
      "grad_norm": 4.025027751922607,
      "learning_rate": 3.772314896107697e-05,
      "loss": 0.0629,
      "step": 1678
    },
    {
      "epoch": 0.4913666959321042,
      "grad_norm": 0.0015903118764981627,
      "learning_rate": 3.77158326016974e-05,
      "loss": 0.0,
      "step": 1679
    },
    {
      "epoch": 0.4916593503072871,
      "grad_norm": 0.015470651909708977,
      "learning_rate": 3.770851624231783e-05,
      "loss": 0.0003,
      "step": 1680
    },
    {
      "epoch": 0.49195200468247,
      "grad_norm": 0.005371852777898312,
      "learning_rate": 3.7701199882938255e-05,
      "loss": 0.0001,
      "step": 1681
    },
    {
      "epoch": 0.4922446590576529,
      "grad_norm": 0.02501101791858673,
      "learning_rate": 3.7693883523558676e-05,
      "loss": 0.0003,
      "step": 1682
    },
    {
      "epoch": 0.4925373134328358,
      "grad_norm": 0.01687714457511902,
      "learning_rate": 3.7686567164179104e-05,
      "loss": 0.0003,
      "step": 1683
    },
    {
      "epoch": 0.49282996780801874,
      "grad_norm": 0.009274466894567013,
      "learning_rate": 3.767925080479953e-05,
      "loss": 0.0002,
      "step": 1684
    },
    {
      "epoch": 0.49312262218320163,
      "grad_norm": 0.008989389054477215,
      "learning_rate": 3.767193444541996e-05,
      "loss": 0.0002,
      "step": 1685
    },
    {
      "epoch": 0.4934152765583846,
      "grad_norm": 0.02986481972038746,
      "learning_rate": 3.766461808604039e-05,
      "loss": 0.0004,
      "step": 1686
    },
    {
      "epoch": 0.49370793093356746,
      "grad_norm": 0.0024271649308502674,
      "learning_rate": 3.7657301726660816e-05,
      "loss": 0.0001,
      "step": 1687
    },
    {
      "epoch": 0.49400058530875035,
      "grad_norm": 0.14183920621871948,
      "learning_rate": 3.7649985367281244e-05,
      "loss": 0.0005,
      "step": 1688
    },
    {
      "epoch": 0.4942932396839333,
      "grad_norm": 0.12646184861660004,
      "learning_rate": 3.764266900790167e-05,
      "loss": 0.001,
      "step": 1689
    },
    {
      "epoch": 0.4945858940591162,
      "grad_norm": 0.18008200824260712,
      "learning_rate": 3.76353526485221e-05,
      "loss": 0.0007,
      "step": 1690
    },
    {
      "epoch": 0.49487854843429907,
      "grad_norm": 2.948591709136963,
      "learning_rate": 3.762803628914253e-05,
      "loss": 0.0058,
      "step": 1691
    },
    {
      "epoch": 0.495171202809482,
      "grad_norm": 3.9043712615966797,
      "learning_rate": 3.7620719929762956e-05,
      "loss": 0.2148,
      "step": 1692
    },
    {
      "epoch": 0.4954638571846649,
      "grad_norm": 0.014103816822171211,
      "learning_rate": 3.761340357038338e-05,
      "loss": 0.0002,
      "step": 1693
    },
    {
      "epoch": 0.49575651155984785,
      "grad_norm": 0.047968197613954544,
      "learning_rate": 3.7606087211003805e-05,
      "loss": 0.0006,
      "step": 1694
    },
    {
      "epoch": 0.49604916593503073,
      "grad_norm": 0.008447016589343548,
      "learning_rate": 3.759877085162423e-05,
      "loss": 0.0002,
      "step": 1695
    },
    {
      "epoch": 0.4963418203102136,
      "grad_norm": 0.004565671551972628,
      "learning_rate": 3.759145449224466e-05,
      "loss": 0.0001,
      "step": 1696
    },
    {
      "epoch": 0.49663447468539657,
      "grad_norm": 0.009304909966886044,
      "learning_rate": 3.758413813286509e-05,
      "loss": 0.0001,
      "step": 1697
    },
    {
      "epoch": 0.49692712906057945,
      "grad_norm": 3.502692699432373,
      "learning_rate": 3.757682177348552e-05,
      "loss": 0.2776,
      "step": 1698
    },
    {
      "epoch": 0.49721978343576234,
      "grad_norm": 0.012169056572020054,
      "learning_rate": 3.7569505414105945e-05,
      "loss": 0.0001,
      "step": 1699
    },
    {
      "epoch": 0.4975124378109453,
      "grad_norm": 0.0030158180743455887,
      "learning_rate": 3.756218905472637e-05,
      "loss": 0.0001,
      "step": 1700
    },
    {
      "epoch": 0.4978050921861282,
      "grad_norm": 0.013154564425349236,
      "learning_rate": 3.75548726953468e-05,
      "loss": 0.0003,
      "step": 1701
    },
    {
      "epoch": 0.4980977465613111,
      "grad_norm": 0.001938572502695024,
      "learning_rate": 3.754755633596723e-05,
      "loss": 0.0,
      "step": 1702
    },
    {
      "epoch": 0.498390400936494,
      "grad_norm": 0.02460942603647709,
      "learning_rate": 3.7540239976587657e-05,
      "loss": 0.0002,
      "step": 1703
    },
    {
      "epoch": 0.4986830553116769,
      "grad_norm": 8.89484691619873,
      "learning_rate": 3.753292361720808e-05,
      "loss": 0.0953,
      "step": 1704
    },
    {
      "epoch": 0.49897570968685984,
      "grad_norm": 0.3271154463291168,
      "learning_rate": 3.7525607257828506e-05,
      "loss": 0.0018,
      "step": 1705
    },
    {
      "epoch": 0.4992683640620427,
      "grad_norm": 0.03161986917257309,
      "learning_rate": 3.7518290898448934e-05,
      "loss": 0.0005,
      "step": 1706
    },
    {
      "epoch": 0.4995610184372256,
      "grad_norm": 0.9015589952468872,
      "learning_rate": 3.751097453906936e-05,
      "loss": 0.0048,
      "step": 1707
    },
    {
      "epoch": 0.49985367281240856,
      "grad_norm": 0.00638006255030632,
      "learning_rate": 3.750365817968979e-05,
      "loss": 0.0002,
      "step": 1708
    },
    {
      "epoch": 0.5001463271875914,
      "grad_norm": 4.401444435119629,
      "learning_rate": 3.749634182031022e-05,
      "loss": 0.0155,
      "step": 1709
    },
    {
      "epoch": 0.5004389815627743,
      "grad_norm": 0.011124534532427788,
      "learning_rate": 3.7489025460930645e-05,
      "loss": 0.0002,
      "step": 1710
    },
    {
      "epoch": 0.5007316359379572,
      "grad_norm": 0.10676395893096924,
      "learning_rate": 3.748170910155107e-05,
      "loss": 0.0009,
      "step": 1711
    },
    {
      "epoch": 0.5010242903131402,
      "grad_norm": 0.010841317474842072,
      "learning_rate": 3.74743927421715e-05,
      "loss": 0.0002,
      "step": 1712
    },
    {
      "epoch": 0.5013169446883231,
      "grad_norm": 0.37011250853538513,
      "learning_rate": 3.746707638279193e-05,
      "loss": 0.0024,
      "step": 1713
    },
    {
      "epoch": 0.501609599063506,
      "grad_norm": 4.9328107833862305,
      "learning_rate": 3.745976002341235e-05,
      "loss": 0.0161,
      "step": 1714
    },
    {
      "epoch": 0.5019022534386889,
      "grad_norm": 0.00960032269358635,
      "learning_rate": 3.745244366403278e-05,
      "loss": 0.0002,
      "step": 1715
    },
    {
      "epoch": 0.5021949078138718,
      "grad_norm": 0.14403264224529266,
      "learning_rate": 3.7445127304653206e-05,
      "loss": 0.0024,
      "step": 1716
    },
    {
      "epoch": 0.5024875621890548,
      "grad_norm": 0.0046923803165555,
      "learning_rate": 3.7437810945273634e-05,
      "loss": 0.0001,
      "step": 1717
    },
    {
      "epoch": 0.5027802165642377,
      "grad_norm": 0.003075518412515521,
      "learning_rate": 3.743049458589406e-05,
      "loss": 0.0001,
      "step": 1718
    },
    {
      "epoch": 0.5030728709394205,
      "grad_norm": 0.020254552364349365,
      "learning_rate": 3.742317822651449e-05,
      "loss": 0.0003,
      "step": 1719
    },
    {
      "epoch": 0.5033655253146034,
      "grad_norm": 0.009212339296936989,
      "learning_rate": 3.741586186713492e-05,
      "loss": 0.0001,
      "step": 1720
    },
    {
      "epoch": 0.5036581796897863,
      "grad_norm": 0.015108101069927216,
      "learning_rate": 3.7408545507755346e-05,
      "loss": 0.0003,
      "step": 1721
    },
    {
      "epoch": 0.5039508340649693,
      "grad_norm": 0.0037316512316465378,
      "learning_rate": 3.7401229148375774e-05,
      "loss": 0.0001,
      "step": 1722
    },
    {
      "epoch": 0.5042434884401522,
      "grad_norm": 0.20514242351055145,
      "learning_rate": 3.73939127889962e-05,
      "loss": 0.0013,
      "step": 1723
    },
    {
      "epoch": 0.5045361428153351,
      "grad_norm": 0.0022379914298653603,
      "learning_rate": 3.738659642961663e-05,
      "loss": 0.0001,
      "step": 1724
    },
    {
      "epoch": 0.504828797190518,
      "grad_norm": 0.0064598931930959225,
      "learning_rate": 3.737928007023705e-05,
      "loss": 0.0002,
      "step": 1725
    },
    {
      "epoch": 0.5051214515657009,
      "grad_norm": 0.01024326216429472,
      "learning_rate": 3.737196371085748e-05,
      "loss": 0.0002,
      "step": 1726
    },
    {
      "epoch": 0.5054141059408838,
      "grad_norm": 0.004229737911373377,
      "learning_rate": 3.736464735147791e-05,
      "loss": 0.0001,
      "step": 1727
    },
    {
      "epoch": 0.5057067603160668,
      "grad_norm": 0.0021727506536990404,
      "learning_rate": 3.7357330992098335e-05,
      "loss": 0.0,
      "step": 1728
    },
    {
      "epoch": 0.5059994146912496,
      "grad_norm": 8.523039817810059,
      "learning_rate": 3.735001463271876e-05,
      "loss": 0.0721,
      "step": 1729
    },
    {
      "epoch": 0.5062920690664325,
      "grad_norm": 0.0025433769915252924,
      "learning_rate": 3.734269827333919e-05,
      "loss": 0.0001,
      "step": 1730
    },
    {
      "epoch": 0.5065847234416154,
      "grad_norm": 0.0037780781276524067,
      "learning_rate": 3.733538191395962e-05,
      "loss": 0.0001,
      "step": 1731
    },
    {
      "epoch": 0.5068773778167983,
      "grad_norm": 0.0531008280813694,
      "learning_rate": 3.732806555458005e-05,
      "loss": 0.0005,
      "step": 1732
    },
    {
      "epoch": 0.5071700321919813,
      "grad_norm": 0.006564264185726643,
      "learning_rate": 3.7320749195200475e-05,
      "loss": 0.0001,
      "step": 1733
    },
    {
      "epoch": 0.5074626865671642,
      "grad_norm": 0.5764566659927368,
      "learning_rate": 3.73134328358209e-05,
      "loss": 0.0028,
      "step": 1734
    },
    {
      "epoch": 0.5077553409423471,
      "grad_norm": 0.001747317728586495,
      "learning_rate": 3.730611647644133e-05,
      "loss": 0.0,
      "step": 1735
    },
    {
      "epoch": 0.50804799531753,
      "grad_norm": 0.005131816025823355,
      "learning_rate": 3.729880011706175e-05,
      "loss": 0.0001,
      "step": 1736
    },
    {
      "epoch": 0.5083406496927129,
      "grad_norm": 0.2529681324958801,
      "learning_rate": 3.729148375768218e-05,
      "loss": 0.0011,
      "step": 1737
    },
    {
      "epoch": 0.5086333040678959,
      "grad_norm": 0.00605887221172452,
      "learning_rate": 3.728416739830261e-05,
      "loss": 0.0001,
      "step": 1738
    },
    {
      "epoch": 0.5089259584430788,
      "grad_norm": 0.15838930010795593,
      "learning_rate": 3.7276851038923036e-05,
      "loss": 0.001,
      "step": 1739
    },
    {
      "epoch": 0.5092186128182616,
      "grad_norm": 0.008716394193470478,
      "learning_rate": 3.7269534679543464e-05,
      "loss": 0.0001,
      "step": 1740
    },
    {
      "epoch": 0.5095112671934445,
      "grad_norm": 0.10244045406579971,
      "learning_rate": 3.726221832016389e-05,
      "loss": 0.0006,
      "step": 1741
    },
    {
      "epoch": 0.5098039215686274,
      "grad_norm": 0.014128678478300571,
      "learning_rate": 3.725490196078432e-05,
      "loss": 0.0002,
      "step": 1742
    },
    {
      "epoch": 0.5100965759438104,
      "grad_norm": 0.024465791881084442,
      "learning_rate": 3.724758560140475e-05,
      "loss": 0.0004,
      "step": 1743
    },
    {
      "epoch": 0.5103892303189933,
      "grad_norm": 0.10913330316543579,
      "learning_rate": 3.724026924202517e-05,
      "loss": 0.0007,
      "step": 1744
    },
    {
      "epoch": 0.5106818846941762,
      "grad_norm": 0.01726347953081131,
      "learning_rate": 3.7232952882645597e-05,
      "loss": 0.0002,
      "step": 1745
    },
    {
      "epoch": 0.5109745390693591,
      "grad_norm": 0.08208633214235306,
      "learning_rate": 3.7225636523266025e-05,
      "loss": 0.0008,
      "step": 1746
    },
    {
      "epoch": 0.511267193444542,
      "grad_norm": 10.661100387573242,
      "learning_rate": 3.721832016388645e-05,
      "loss": 0.0384,
      "step": 1747
    },
    {
      "epoch": 0.5115598478197249,
      "grad_norm": 0.0018042756710201502,
      "learning_rate": 3.721100380450688e-05,
      "loss": 0.0,
      "step": 1748
    },
    {
      "epoch": 0.5118525021949079,
      "grad_norm": 0.014229393564164639,
      "learning_rate": 3.720368744512731e-05,
      "loss": 0.0001,
      "step": 1749
    },
    {
      "epoch": 0.5121451565700907,
      "grad_norm": 0.002377843949943781,
      "learning_rate": 3.7196371085747736e-05,
      "loss": 0.0001,
      "step": 1750
    },
    {
      "epoch": 0.5124378109452736,
      "grad_norm": 0.08071793615818024,
      "learning_rate": 3.7189054726368164e-05,
      "loss": 0.0004,
      "step": 1751
    },
    {
      "epoch": 0.5127304653204565,
      "grad_norm": 0.0018894653767347336,
      "learning_rate": 3.7181738366988585e-05,
      "loss": 0.0,
      "step": 1752
    },
    {
      "epoch": 0.5130231196956394,
      "grad_norm": 0.0011113358195871115,
      "learning_rate": 3.7174422007609013e-05,
      "loss": 0.0,
      "step": 1753
    },
    {
      "epoch": 0.5133157740708224,
      "grad_norm": 0.002135826740413904,
      "learning_rate": 3.716710564822944e-05,
      "loss": 0.0001,
      "step": 1754
    },
    {
      "epoch": 0.5136084284460053,
      "grad_norm": 0.00445591239258647,
      "learning_rate": 3.715978928884987e-05,
      "loss": 0.0001,
      "step": 1755
    },
    {
      "epoch": 0.5139010828211882,
      "grad_norm": 0.0021990472450852394,
      "learning_rate": 3.71524729294703e-05,
      "loss": 0.0,
      "step": 1756
    },
    {
      "epoch": 0.5141937371963711,
      "grad_norm": 0.11762271821498871,
      "learning_rate": 3.7145156570090725e-05,
      "loss": 0.0004,
      "step": 1757
    },
    {
      "epoch": 0.514486391571554,
      "grad_norm": 6.813816547393799,
      "learning_rate": 3.713784021071115e-05,
      "loss": 0.096,
      "step": 1758
    },
    {
      "epoch": 0.514779045946737,
      "grad_norm": 0.007990210317075253,
      "learning_rate": 3.713052385133158e-05,
      "loss": 0.0001,
      "step": 1759
    },
    {
      "epoch": 0.5150717003219198,
      "grad_norm": 0.0036788741126656532,
      "learning_rate": 3.7123207491952e-05,
      "loss": 0.0001,
      "step": 1760
    },
    {
      "epoch": 0.5153643546971027,
      "grad_norm": 0.0008727388922125101,
      "learning_rate": 3.711589113257243e-05,
      "loss": 0.0,
      "step": 1761
    },
    {
      "epoch": 0.5156570090722856,
      "grad_norm": 0.0029930968303233385,
      "learning_rate": 3.710857477319286e-05,
      "loss": 0.0001,
      "step": 1762
    },
    {
      "epoch": 0.5159496634474685,
      "grad_norm": 0.013158080168068409,
      "learning_rate": 3.7101258413813286e-05,
      "loss": 0.0001,
      "step": 1763
    },
    {
      "epoch": 0.5162423178226514,
      "grad_norm": 0.001070450060069561,
      "learning_rate": 3.7093942054433714e-05,
      "loss": 0.0,
      "step": 1764
    },
    {
      "epoch": 0.5165349721978344,
      "grad_norm": 0.03361155837774277,
      "learning_rate": 3.708662569505414e-05,
      "loss": 0.0002,
      "step": 1765
    },
    {
      "epoch": 0.5168276265730173,
      "grad_norm": 0.044139012694358826,
      "learning_rate": 3.707930933567457e-05,
      "loss": 0.0005,
      "step": 1766
    },
    {
      "epoch": 0.5171202809482002,
      "grad_norm": 0.004443437326699495,
      "learning_rate": 3.7071992976295e-05,
      "loss": 0.0001,
      "step": 1767
    },
    {
      "epoch": 0.5174129353233831,
      "grad_norm": 0.01623067446053028,
      "learning_rate": 3.706467661691542e-05,
      "loss": 0.0002,
      "step": 1768
    },
    {
      "epoch": 0.517705589698566,
      "grad_norm": 0.0013135349145159125,
      "learning_rate": 3.705736025753585e-05,
      "loss": 0.0,
      "step": 1769
    },
    {
      "epoch": 0.517998244073749,
      "grad_norm": 0.006052326411008835,
      "learning_rate": 3.7050043898156275e-05,
      "loss": 0.0001,
      "step": 1770
    },
    {
      "epoch": 0.5182908984489318,
      "grad_norm": 0.015305147506296635,
      "learning_rate": 3.70427275387767e-05,
      "loss": 0.0002,
      "step": 1771
    },
    {
      "epoch": 0.5185835528241147,
      "grad_norm": 0.0007871999987401068,
      "learning_rate": 3.703541117939713e-05,
      "loss": 0.0,
      "step": 1772
    },
    {
      "epoch": 0.5188762071992976,
      "grad_norm": 0.005125640891492367,
      "learning_rate": 3.702809482001756e-05,
      "loss": 0.0001,
      "step": 1773
    },
    {
      "epoch": 0.5191688615744805,
      "grad_norm": 0.004210739862173796,
      "learning_rate": 3.702077846063799e-05,
      "loss": 0.0001,
      "step": 1774
    },
    {
      "epoch": 0.5194615159496635,
      "grad_norm": 8.280128479003906,
      "learning_rate": 3.7013462101258415e-05,
      "loss": 0.0457,
      "step": 1775
    },
    {
      "epoch": 0.5197541703248464,
      "grad_norm": 9.250022888183594,
      "learning_rate": 3.700614574187884e-05,
      "loss": 0.0734,
      "step": 1776
    },
    {
      "epoch": 0.5200468247000293,
      "grad_norm": 6.377782344818115,
      "learning_rate": 3.699882938249927e-05,
      "loss": 0.1639,
      "step": 1777
    },
    {
      "epoch": 0.5203394790752122,
      "grad_norm": 0.008695557713508606,
      "learning_rate": 3.699151302311969e-05,
      "loss": 0.0001,
      "step": 1778
    },
    {
      "epoch": 0.520632133450395,
      "grad_norm": 4.113627910614014,
      "learning_rate": 3.698419666374012e-05,
      "loss": 0.0101,
      "step": 1779
    },
    {
      "epoch": 0.5209247878255779,
      "grad_norm": 0.01942027173936367,
      "learning_rate": 3.697688030436055e-05,
      "loss": 0.0002,
      "step": 1780
    },
    {
      "epoch": 0.5212174422007609,
      "grad_norm": 0.0015956436982378364,
      "learning_rate": 3.6969563944980976e-05,
      "loss": 0.0,
      "step": 1781
    },
    {
      "epoch": 0.5215100965759438,
      "grad_norm": 0.004826758522540331,
      "learning_rate": 3.6962247585601404e-05,
      "loss": 0.0001,
      "step": 1782
    },
    {
      "epoch": 0.5218027509511267,
      "grad_norm": 0.006228056736290455,
      "learning_rate": 3.695493122622183e-05,
      "loss": 0.0001,
      "step": 1783
    },
    {
      "epoch": 0.5220954053263096,
      "grad_norm": 0.0038263732567429543,
      "learning_rate": 3.694761486684226e-05,
      "loss": 0.0001,
      "step": 1784
    },
    {
      "epoch": 0.5223880597014925,
      "grad_norm": 0.0545361191034317,
      "learning_rate": 3.694029850746269e-05,
      "loss": 0.0006,
      "step": 1785
    },
    {
      "epoch": 0.5226807140766755,
      "grad_norm": 6.925477027893066,
      "learning_rate": 3.6932982148083115e-05,
      "loss": 0.1366,
      "step": 1786
    },
    {
      "epoch": 0.5229733684518584,
      "grad_norm": 0.03318082541227341,
      "learning_rate": 3.692566578870354e-05,
      "loss": 0.0003,
      "step": 1787
    },
    {
      "epoch": 0.5232660228270413,
      "grad_norm": 0.02219540998339653,
      "learning_rate": 3.691834942932397e-05,
      "loss": 0.0003,
      "step": 1788
    },
    {
      "epoch": 0.5235586772022242,
      "grad_norm": 0.10277865082025528,
      "learning_rate": 3.691103306994439e-05,
      "loss": 0.0005,
      "step": 1789
    },
    {
      "epoch": 0.523851331577407,
      "grad_norm": 0.007709465455263853,
      "learning_rate": 3.690371671056482e-05,
      "loss": 0.0001,
      "step": 1790
    },
    {
      "epoch": 0.52414398595259,
      "grad_norm": 0.003918229136615992,
      "learning_rate": 3.689640035118525e-05,
      "loss": 0.0001,
      "step": 1791
    },
    {
      "epoch": 0.5244366403277729,
      "grad_norm": 0.7273104190826416,
      "learning_rate": 3.6889083991805676e-05,
      "loss": 0.0058,
      "step": 1792
    },
    {
      "epoch": 0.5247292947029558,
      "grad_norm": 0.02427494153380394,
      "learning_rate": 3.6881767632426104e-05,
      "loss": 0.0002,
      "step": 1793
    },
    {
      "epoch": 0.5250219490781387,
      "grad_norm": 0.3889729082584381,
      "learning_rate": 3.687445127304653e-05,
      "loss": 0.0017,
      "step": 1794
    },
    {
      "epoch": 0.5253146034533216,
      "grad_norm": 0.30237486958503723,
      "learning_rate": 3.686713491366696e-05,
      "loss": 0.0016,
      "step": 1795
    },
    {
      "epoch": 0.5256072578285045,
      "grad_norm": 0.003748288843780756,
      "learning_rate": 3.685981855428739e-05,
      "loss": 0.0001,
      "step": 1796
    },
    {
      "epoch": 0.5258999122036875,
      "grad_norm": 9.525371551513672,
      "learning_rate": 3.6852502194907816e-05,
      "loss": 0.0617,
      "step": 1797
    },
    {
      "epoch": 0.5261925665788704,
      "grad_norm": 0.034219879657030106,
      "learning_rate": 3.6845185835528244e-05,
      "loss": 0.0003,
      "step": 1798
    },
    {
      "epoch": 0.5264852209540533,
      "grad_norm": 0.052397776395082474,
      "learning_rate": 3.6837869476148665e-05,
      "loss": 0.0003,
      "step": 1799
    },
    {
      "epoch": 0.5267778753292361,
      "grad_norm": 0.0034943039063364267,
      "learning_rate": 3.683055311676909e-05,
      "loss": 0.0001,
      "step": 1800
    },
    {
      "epoch": 0.527070529704419,
      "grad_norm": 0.00827542133629322,
      "learning_rate": 3.682323675738952e-05,
      "loss": 0.0001,
      "step": 1801
    },
    {
      "epoch": 0.527363184079602,
      "grad_norm": 0.06492964178323746,
      "learning_rate": 3.681592039800995e-05,
      "loss": 0.0004,
      "step": 1802
    },
    {
      "epoch": 0.5276558384547849,
      "grad_norm": 0.06064360961318016,
      "learning_rate": 3.680860403863038e-05,
      "loss": 0.0004,
      "step": 1803
    },
    {
      "epoch": 0.5279484928299678,
      "grad_norm": 0.03688051551580429,
      "learning_rate": 3.6801287679250805e-05,
      "loss": 0.0003,
      "step": 1804
    },
    {
      "epoch": 0.5282411472051507,
      "grad_norm": 0.0004697742697317153,
      "learning_rate": 3.679397131987123e-05,
      "loss": 0.0,
      "step": 1805
    },
    {
      "epoch": 0.5285338015803336,
      "grad_norm": 0.03380119800567627,
      "learning_rate": 3.678665496049166e-05,
      "loss": 0.0002,
      "step": 1806
    },
    {
      "epoch": 0.5288264559555166,
      "grad_norm": 0.0014248295919969678,
      "learning_rate": 3.677933860111209e-05,
      "loss": 0.0,
      "step": 1807
    },
    {
      "epoch": 0.5291191103306995,
      "grad_norm": 0.0007322686142288148,
      "learning_rate": 3.677202224173252e-05,
      "loss": 0.0,
      "step": 1808
    },
    {
      "epoch": 0.5294117647058824,
      "grad_norm": 0.01017748098820448,
      "learning_rate": 3.6764705882352945e-05,
      "loss": 0.0001,
      "step": 1809
    },
    {
      "epoch": 0.5297044190810652,
      "grad_norm": 0.08894390612840652,
      "learning_rate": 3.6757389522973366e-05,
      "loss": 0.0004,
      "step": 1810
    },
    {
      "epoch": 0.5299970734562481,
      "grad_norm": 0.002421896904706955,
      "learning_rate": 3.6750073163593794e-05,
      "loss": 0.0,
      "step": 1811
    },
    {
      "epoch": 0.5302897278314311,
      "grad_norm": 0.0010194090427830815,
      "learning_rate": 3.674275680421422e-05,
      "loss": 0.0,
      "step": 1812
    },
    {
      "epoch": 0.530582382206614,
      "grad_norm": 0.0037550644483417273,
      "learning_rate": 3.673544044483465e-05,
      "loss": 0.0001,
      "step": 1813
    },
    {
      "epoch": 0.5308750365817969,
      "grad_norm": 0.006121156271547079,
      "learning_rate": 3.672812408545508e-05,
      "loss": 0.0001,
      "step": 1814
    },
    {
      "epoch": 0.5311676909569798,
      "grad_norm": 0.0014041484100744128,
      "learning_rate": 3.6720807726075506e-05,
      "loss": 0.0,
      "step": 1815
    },
    {
      "epoch": 0.5314603453321627,
      "grad_norm": 0.0009004389285109937,
      "learning_rate": 3.6713491366695934e-05,
      "loss": 0.0,
      "step": 1816
    },
    {
      "epoch": 0.5317529997073456,
      "grad_norm": 0.1951526403427124,
      "learning_rate": 3.670617500731636e-05,
      "loss": 0.0008,
      "step": 1817
    },
    {
      "epoch": 0.5320456540825286,
      "grad_norm": 0.0010145616251975298,
      "learning_rate": 3.669885864793679e-05,
      "loss": 0.0,
      "step": 1818
    },
    {
      "epoch": 0.5323383084577115,
      "grad_norm": 0.0022146685514599085,
      "learning_rate": 3.669154228855722e-05,
      "loss": 0.0,
      "step": 1819
    },
    {
      "epoch": 0.5326309628328944,
      "grad_norm": 0.0035102490801364183,
      "learning_rate": 3.668422592917764e-05,
      "loss": 0.0,
      "step": 1820
    },
    {
      "epoch": 0.5329236172080772,
      "grad_norm": 0.006640933454036713,
      "learning_rate": 3.6676909569798067e-05,
      "loss": 0.0001,
      "step": 1821
    },
    {
      "epoch": 0.5332162715832601,
      "grad_norm": 0.010873553343117237,
      "learning_rate": 3.6669593210418495e-05,
      "loss": 0.0001,
      "step": 1822
    },
    {
      "epoch": 0.5335089259584431,
      "grad_norm": 0.5120856761932373,
      "learning_rate": 3.666227685103892e-05,
      "loss": 0.0017,
      "step": 1823
    },
    {
      "epoch": 0.533801580333626,
      "grad_norm": 0.0015129977837204933,
      "learning_rate": 3.665496049165935e-05,
      "loss": 0.0,
      "step": 1824
    },
    {
      "epoch": 0.5340942347088089,
      "grad_norm": 0.023982318118214607,
      "learning_rate": 3.664764413227978e-05,
      "loss": 0.0002,
      "step": 1825
    },
    {
      "epoch": 0.5343868890839918,
      "grad_norm": 0.007661939598619938,
      "learning_rate": 3.6640327772900206e-05,
      "loss": 0.0001,
      "step": 1826
    },
    {
      "epoch": 0.5346795434591747,
      "grad_norm": 0.0005000335513614118,
      "learning_rate": 3.6633011413520634e-05,
      "loss": 0.0,
      "step": 1827
    },
    {
      "epoch": 0.5349721978343577,
      "grad_norm": 0.0013710459461435676,
      "learning_rate": 3.662569505414106e-05,
      "loss": 0.0,
      "step": 1828
    },
    {
      "epoch": 0.5352648522095406,
      "grad_norm": 0.009977270849049091,
      "learning_rate": 3.661837869476149e-05,
      "loss": 0.0001,
      "step": 1829
    },
    {
      "epoch": 0.5355575065847235,
      "grad_norm": 0.0027754006441682577,
      "learning_rate": 3.661106233538192e-05,
      "loss": 0.0,
      "step": 1830
    },
    {
      "epoch": 0.5358501609599063,
      "grad_norm": 0.0007236430537886918,
      "learning_rate": 3.660374597600234e-05,
      "loss": 0.0,
      "step": 1831
    },
    {
      "epoch": 0.5361428153350892,
      "grad_norm": 0.0009558421443216503,
      "learning_rate": 3.659642961662277e-05,
      "loss": 0.0,
      "step": 1832
    },
    {
      "epoch": 0.5364354697102721,
      "grad_norm": 0.00018310759332962334,
      "learning_rate": 3.6589113257243195e-05,
      "loss": 0.0,
      "step": 1833
    },
    {
      "epoch": 0.5367281240854551,
      "grad_norm": 0.00036073499359190464,
      "learning_rate": 3.658179689786362e-05,
      "loss": 0.0,
      "step": 1834
    },
    {
      "epoch": 0.537020778460638,
      "grad_norm": 5.54606819152832,
      "learning_rate": 3.657448053848405e-05,
      "loss": 0.0191,
      "step": 1835
    },
    {
      "epoch": 0.5373134328358209,
      "grad_norm": 0.000820028712041676,
      "learning_rate": 3.656716417910448e-05,
      "loss": 0.0,
      "step": 1836
    },
    {
      "epoch": 0.5376060872110038,
      "grad_norm": 0.000855633057653904,
      "learning_rate": 3.655984781972491e-05,
      "loss": 0.0,
      "step": 1837
    },
    {
      "epoch": 0.5378987415861867,
      "grad_norm": 0.0016642031259834766,
      "learning_rate": 3.6552531460345335e-05,
      "loss": 0.0,
      "step": 1838
    },
    {
      "epoch": 0.5381913959613697,
      "grad_norm": 0.0009357350063510239,
      "learning_rate": 3.654521510096576e-05,
      "loss": 0.0,
      "step": 1839
    },
    {
      "epoch": 0.5384840503365526,
      "grad_norm": 0.0006593746365979314,
      "learning_rate": 3.653789874158619e-05,
      "loss": 0.0,
      "step": 1840
    },
    {
      "epoch": 0.5387767047117354,
      "grad_norm": 0.06413095444440842,
      "learning_rate": 3.653058238220662e-05,
      "loss": 0.0004,
      "step": 1841
    },
    {
      "epoch": 0.5390693590869183,
      "grad_norm": 0.003475102363154292,
      "learning_rate": 3.652326602282704e-05,
      "loss": 0.0,
      "step": 1842
    },
    {
      "epoch": 0.5393620134621012,
      "grad_norm": 0.00027950009098276496,
      "learning_rate": 3.651594966344747e-05,
      "loss": 0.0,
      "step": 1843
    },
    {
      "epoch": 0.5396546678372842,
      "grad_norm": 0.0063551911152899265,
      "learning_rate": 3.6508633304067896e-05,
      "loss": 0.0001,
      "step": 1844
    },
    {
      "epoch": 0.5399473222124671,
      "grad_norm": 0.0005005158018320799,
      "learning_rate": 3.6501316944688324e-05,
      "loss": 0.0,
      "step": 1845
    },
    {
      "epoch": 0.54023997658765,
      "grad_norm": 0.001076875370927155,
      "learning_rate": 3.649400058530875e-05,
      "loss": 0.0,
      "step": 1846
    },
    {
      "epoch": 0.5405326309628329,
      "grad_norm": 0.00039572734385728836,
      "learning_rate": 3.648668422592918e-05,
      "loss": 0.0,
      "step": 1847
    },
    {
      "epoch": 0.5408252853380158,
      "grad_norm": 0.0002550368953961879,
      "learning_rate": 3.647936786654961e-05,
      "loss": 0.0,
      "step": 1848
    },
    {
      "epoch": 0.5411179397131987,
      "grad_norm": 0.001221882994286716,
      "learning_rate": 3.6472051507170036e-05,
      "loss": 0.0,
      "step": 1849
    },
    {
      "epoch": 0.5414105940883817,
      "grad_norm": 0.0008454864728264511,
      "learning_rate": 3.6464735147790464e-05,
      "loss": 0.0,
      "step": 1850
    },
    {
      "epoch": 0.5417032484635645,
      "grad_norm": 9.2111382400617e-05,
      "learning_rate": 3.645741878841089e-05,
      "loss": 0.0,
      "step": 1851
    },
    {
      "epoch": 0.5419959028387474,
      "grad_norm": 8.438952445983887,
      "learning_rate": 3.645010242903131e-05,
      "loss": 0.2633,
      "step": 1852
    },
    {
      "epoch": 0.5422885572139303,
      "grad_norm": 0.001004392164759338,
      "learning_rate": 3.644278606965174e-05,
      "loss": 0.0,
      "step": 1853
    },
    {
      "epoch": 0.5425812115891132,
      "grad_norm": 0.00137075234670192,
      "learning_rate": 3.643546971027217e-05,
      "loss": 0.0,
      "step": 1854
    },
    {
      "epoch": 0.5428738659642962,
      "grad_norm": 0.008688335306942463,
      "learning_rate": 3.6428153350892597e-05,
      "loss": 0.0001,
      "step": 1855
    },
    {
      "epoch": 0.5431665203394791,
      "grad_norm": 0.0057664671912789345,
      "learning_rate": 3.6420836991513025e-05,
      "loss": 0.0001,
      "step": 1856
    },
    {
      "epoch": 0.543459174714662,
      "grad_norm": 0.00026519360835663974,
      "learning_rate": 3.641352063213345e-05,
      "loss": 0.0,
      "step": 1857
    },
    {
      "epoch": 0.5437518290898449,
      "grad_norm": 5.946147441864014,
      "learning_rate": 3.640620427275388e-05,
      "loss": 0.0278,
      "step": 1858
    },
    {
      "epoch": 0.5440444834650278,
      "grad_norm": 0.0016452086856588721,
      "learning_rate": 3.639888791337431e-05,
      "loss": 0.0,
      "step": 1859
    },
    {
      "epoch": 0.5443371378402108,
      "grad_norm": 0.001900315866805613,
      "learning_rate": 3.6391571553994736e-05,
      "loss": 0.0,
      "step": 1860
    },
    {
      "epoch": 0.5446297922153936,
      "grad_norm": 0.001699809799902141,
      "learning_rate": 3.6384255194615164e-05,
      "loss": 0.0,
      "step": 1861
    },
    {
      "epoch": 0.5449224465905765,
      "grad_norm": 0.004050768446177244,
      "learning_rate": 3.637693883523559e-05,
      "loss": 0.0001,
      "step": 1862
    },
    {
      "epoch": 0.5452151009657594,
      "grad_norm": 0.0055578239262104034,
      "learning_rate": 3.6369622475856013e-05,
      "loss": 0.0001,
      "step": 1863
    },
    {
      "epoch": 0.5455077553409423,
      "grad_norm": 0.002650769893079996,
      "learning_rate": 3.636230611647644e-05,
      "loss": 0.0,
      "step": 1864
    },
    {
      "epoch": 0.5458004097161252,
      "grad_norm": 0.006541771348565817,
      "learning_rate": 3.635498975709687e-05,
      "loss": 0.0001,
      "step": 1865
    },
    {
      "epoch": 0.5460930640913082,
      "grad_norm": 0.005475026089698076,
      "learning_rate": 3.63476733977173e-05,
      "loss": 0.0001,
      "step": 1866
    },
    {
      "epoch": 0.5463857184664911,
      "grad_norm": 0.014294413849711418,
      "learning_rate": 3.6340357038337725e-05,
      "loss": 0.0002,
      "step": 1867
    },
    {
      "epoch": 0.546678372841674,
      "grad_norm": 5.496011257171631,
      "learning_rate": 3.633304067895815e-05,
      "loss": 0.2543,
      "step": 1868
    },
    {
      "epoch": 0.5469710272168569,
      "grad_norm": 0.007919173687696457,
      "learning_rate": 3.632572431957858e-05,
      "loss": 0.0001,
      "step": 1869
    },
    {
      "epoch": 0.5472636815920398,
      "grad_norm": 0.06703829765319824,
      "learning_rate": 3.631840796019901e-05,
      "loss": 0.0007,
      "step": 1870
    },
    {
      "epoch": 0.5475563359672228,
      "grad_norm": 0.01958063803613186,
      "learning_rate": 3.631109160081944e-05,
      "loss": 0.0004,
      "step": 1871
    },
    {
      "epoch": 0.5478489903424056,
      "grad_norm": 0.009955674409866333,
      "learning_rate": 3.6303775241439865e-05,
      "loss": 0.0002,
      "step": 1872
    },
    {
      "epoch": 0.5481416447175885,
      "grad_norm": 0.013306597247719765,
      "learning_rate": 3.6296458882060286e-05,
      "loss": 0.0003,
      "step": 1873
    },
    {
      "epoch": 0.5484342990927714,
      "grad_norm": 0.005448292475193739,
      "learning_rate": 3.6289142522680714e-05,
      "loss": 0.0001,
      "step": 1874
    },
    {
      "epoch": 0.5487269534679543,
      "grad_norm": 0.020972304046154022,
      "learning_rate": 3.628182616330114e-05,
      "loss": 0.0004,
      "step": 1875
    },
    {
      "epoch": 0.5490196078431373,
      "grad_norm": 0.013444705866277218,
      "learning_rate": 3.627450980392157e-05,
      "loss": 0.0002,
      "step": 1876
    },
    {
      "epoch": 0.5493122622183202,
      "grad_norm": 0.005087513942271471,
      "learning_rate": 3.6267193444542e-05,
      "loss": 0.0001,
      "step": 1877
    },
    {
      "epoch": 0.5496049165935031,
      "grad_norm": 0.007403940428048372,
      "learning_rate": 3.6259877085162426e-05,
      "loss": 0.0001,
      "step": 1878
    },
    {
      "epoch": 0.549897570968686,
      "grad_norm": 0.0029638975393027067,
      "learning_rate": 3.6252560725782854e-05,
      "loss": 0.0001,
      "step": 1879
    },
    {
      "epoch": 0.5501902253438689,
      "grad_norm": 0.0048151337541639805,
      "learning_rate": 3.624524436640328e-05,
      "loss": 0.0001,
      "step": 1880
    },
    {
      "epoch": 0.5504828797190519,
      "grad_norm": 0.008215599693357944,
      "learning_rate": 3.623792800702371e-05,
      "loss": 0.0002,
      "step": 1881
    },
    {
      "epoch": 0.5507755340942347,
      "grad_norm": 0.0034256833605468273,
      "learning_rate": 3.623061164764414e-05,
      "loss": 0.0001,
      "step": 1882
    },
    {
      "epoch": 0.5510681884694176,
      "grad_norm": 0.007427630480378866,
      "learning_rate": 3.6223295288264566e-05,
      "loss": 0.0001,
      "step": 1883
    },
    {
      "epoch": 0.5513608428446005,
      "grad_norm": 0.01656688190996647,
      "learning_rate": 3.621597892888499e-05,
      "loss": 0.0001,
      "step": 1884
    },
    {
      "epoch": 0.5516534972197834,
      "grad_norm": 0.005414238199591637,
      "learning_rate": 3.6208662569505415e-05,
      "loss": 0.0001,
      "step": 1885
    },
    {
      "epoch": 0.5519461515949663,
      "grad_norm": 0.002928486093878746,
      "learning_rate": 3.620134621012584e-05,
      "loss": 0.0001,
      "step": 1886
    },
    {
      "epoch": 0.5522388059701493,
      "grad_norm": 0.004565770737826824,
      "learning_rate": 3.619402985074627e-05,
      "loss": 0.0001,
      "step": 1887
    },
    {
      "epoch": 0.5525314603453322,
      "grad_norm": 0.0021436475217342377,
      "learning_rate": 3.61867134913667e-05,
      "loss": 0.0001,
      "step": 1888
    },
    {
      "epoch": 0.5528241147205151,
      "grad_norm": 0.024198435246944427,
      "learning_rate": 3.6179397131987127e-05,
      "loss": 0.0002,
      "step": 1889
    },
    {
      "epoch": 0.553116769095698,
      "grad_norm": 4.87838888168335,
      "learning_rate": 3.6172080772607554e-05,
      "loss": 0.0229,
      "step": 1890
    },
    {
      "epoch": 0.5534094234708808,
      "grad_norm": 0.12384998053312302,
      "learning_rate": 3.616476441322798e-05,
      "loss": 0.0006,
      "step": 1891
    },
    {
      "epoch": 0.5537020778460638,
      "grad_norm": 0.15898260474205017,
      "learning_rate": 3.615744805384841e-05,
      "loss": 0.0007,
      "step": 1892
    },
    {
      "epoch": 0.5539947322212467,
      "grad_norm": 0.055284690111875534,
      "learning_rate": 3.615013169446884e-05,
      "loss": 0.0004,
      "step": 1893
    },
    {
      "epoch": 0.5542873865964296,
      "grad_norm": 0.002289955737069249,
      "learning_rate": 3.6142815335089266e-05,
      "loss": 0.0001,
      "step": 1894
    },
    {
      "epoch": 0.5545800409716125,
      "grad_norm": 0.0055839489214122295,
      "learning_rate": 3.613549897570969e-05,
      "loss": 0.0001,
      "step": 1895
    },
    {
      "epoch": 0.5548726953467954,
      "grad_norm": 0.040371961891651154,
      "learning_rate": 3.6128182616330115e-05,
      "loss": 0.0004,
      "step": 1896
    },
    {
      "epoch": 0.5551653497219784,
      "grad_norm": 0.006794141139835119,
      "learning_rate": 3.612086625695054e-05,
      "loss": 0.0001,
      "step": 1897
    },
    {
      "epoch": 0.5554580040971613,
      "grad_norm": 0.0622115284204483,
      "learning_rate": 3.611354989757097e-05,
      "loss": 0.0004,
      "step": 1898
    },
    {
      "epoch": 0.5557506584723442,
      "grad_norm": 0.002630019560456276,
      "learning_rate": 3.61062335381914e-05,
      "loss": 0.0001,
      "step": 1899
    },
    {
      "epoch": 0.5560433128475271,
      "grad_norm": 0.0018102923640981317,
      "learning_rate": 3.609891717881183e-05,
      "loss": 0.0,
      "step": 1900
    },
    {
      "epoch": 0.55633596722271,
      "grad_norm": 0.08920388668775558,
      "learning_rate": 3.6091600819432255e-05,
      "loss": 0.0006,
      "step": 1901
    },
    {
      "epoch": 0.5566286215978928,
      "grad_norm": 0.0019470619736239314,
      "learning_rate": 3.608428446005268e-05,
      "loss": 0.0,
      "step": 1902
    },
    {
      "epoch": 0.5569212759730758,
      "grad_norm": 0.005832942668348551,
      "learning_rate": 3.607696810067311e-05,
      "loss": 0.0001,
      "step": 1903
    },
    {
      "epoch": 0.5572139303482587,
      "grad_norm": 5.806044578552246,
      "learning_rate": 3.606965174129354e-05,
      "loss": 0.1801,
      "step": 1904
    },
    {
      "epoch": 0.5575065847234416,
      "grad_norm": 0.005481138359755278,
      "learning_rate": 3.606233538191396e-05,
      "loss": 0.0001,
      "step": 1905
    },
    {
      "epoch": 0.5577992390986245,
      "grad_norm": 0.008103495463728905,
      "learning_rate": 3.605501902253439e-05,
      "loss": 0.0001,
      "step": 1906
    },
    {
      "epoch": 0.5580918934738074,
      "grad_norm": 2.412076473236084,
      "learning_rate": 3.6047702663154816e-05,
      "loss": 0.0053,
      "step": 1907
    },
    {
      "epoch": 0.5583845478489904,
      "grad_norm": 0.002062377519905567,
      "learning_rate": 3.6040386303775244e-05,
      "loss": 0.0001,
      "step": 1908
    },
    {
      "epoch": 0.5586772022241733,
      "grad_norm": 0.00176193134393543,
      "learning_rate": 3.603306994439567e-05,
      "loss": 0.0001,
      "step": 1909
    },
    {
      "epoch": 0.5589698565993562,
      "grad_norm": 0.0025971047580242157,
      "learning_rate": 3.60257535850161e-05,
      "loss": 0.0001,
      "step": 1910
    },
    {
      "epoch": 0.559262510974539,
      "grad_norm": 0.00832816306501627,
      "learning_rate": 3.601843722563653e-05,
      "loss": 0.0002,
      "step": 1911
    },
    {
      "epoch": 0.5595551653497219,
      "grad_norm": 4.229795455932617,
      "learning_rate": 3.6011120866256956e-05,
      "loss": 0.1842,
      "step": 1912
    },
    {
      "epoch": 0.5598478197249049,
      "grad_norm": 0.0056917197071015835,
      "learning_rate": 3.6003804506877384e-05,
      "loss": 0.0001,
      "step": 1913
    },
    {
      "epoch": 0.5601404741000878,
      "grad_norm": 2.5309674739837646,
      "learning_rate": 3.599648814749781e-05,
      "loss": 0.0057,
      "step": 1914
    },
    {
      "epoch": 0.5604331284752707,
      "grad_norm": 0.008329502306878567,
      "learning_rate": 3.598917178811824e-05,
      "loss": 0.0002,
      "step": 1915
    },
    {
      "epoch": 0.5607257828504536,
      "grad_norm": 0.005598751828074455,
      "learning_rate": 3.598185542873866e-05,
      "loss": 0.0001,
      "step": 1916
    },
    {
      "epoch": 0.5610184372256365,
      "grad_norm": 0.00646185502409935,
      "learning_rate": 3.597453906935909e-05,
      "loss": 0.0001,
      "step": 1917
    },
    {
      "epoch": 0.5613110916008194,
      "grad_norm": 0.006866334471851587,
      "learning_rate": 3.596722270997952e-05,
      "loss": 0.0002,
      "step": 1918
    },
    {
      "epoch": 0.5616037459760024,
      "grad_norm": 0.0034606242552399635,
      "learning_rate": 3.5959906350599945e-05,
      "loss": 0.0001,
      "step": 1919
    },
    {
      "epoch": 0.5618964003511853,
      "grad_norm": 0.004204215481877327,
      "learning_rate": 3.595258999122037e-05,
      "loss": 0.0001,
      "step": 1920
    },
    {
      "epoch": 0.5621890547263682,
      "grad_norm": 0.006527795922011137,
      "learning_rate": 3.59452736318408e-05,
      "loss": 0.0001,
      "step": 1921
    },
    {
      "epoch": 0.562481709101551,
      "grad_norm": 0.08269781619310379,
      "learning_rate": 3.593795727246123e-05,
      "loss": 0.0005,
      "step": 1922
    },
    {
      "epoch": 0.5627743634767339,
      "grad_norm": 0.0049555618315935135,
      "learning_rate": 3.5930640913081657e-05,
      "loss": 0.0001,
      "step": 1923
    },
    {
      "epoch": 0.5630670178519169,
      "grad_norm": 0.17137527465820312,
      "learning_rate": 3.5923324553702084e-05,
      "loss": 0.0011,
      "step": 1924
    },
    {
      "epoch": 0.5633596722270998,
      "grad_norm": 0.03199278190732002,
      "learning_rate": 3.591600819432251e-05,
      "loss": 0.0003,
      "step": 1925
    },
    {
      "epoch": 0.5636523266022827,
      "grad_norm": 0.021704381331801414,
      "learning_rate": 3.5908691834942934e-05,
      "loss": 0.0003,
      "step": 1926
    },
    {
      "epoch": 0.5639449809774656,
      "grad_norm": 0.0018218193436041474,
      "learning_rate": 3.590137547556336e-05,
      "loss": 0.0,
      "step": 1927
    },
    {
      "epoch": 0.5642376353526485,
      "grad_norm": 1.291187047958374,
      "learning_rate": 3.589405911618379e-05,
      "loss": 0.0039,
      "step": 1928
    },
    {
      "epoch": 0.5645302897278315,
      "grad_norm": 4.635223865509033,
      "learning_rate": 3.588674275680422e-05,
      "loss": 0.2324,
      "step": 1929
    },
    {
      "epoch": 0.5648229441030144,
      "grad_norm": 0.0054092868231236935,
      "learning_rate": 3.5879426397424645e-05,
      "loss": 0.0001,
      "step": 1930
    },
    {
      "epoch": 0.5651155984781973,
      "grad_norm": 0.008922097273170948,
      "learning_rate": 3.587211003804507e-05,
      "loss": 0.0002,
      "step": 1931
    },
    {
      "epoch": 0.5654082528533801,
      "grad_norm": 0.002301436150446534,
      "learning_rate": 3.58647936786655e-05,
      "loss": 0.0001,
      "step": 1932
    },
    {
      "epoch": 0.565700907228563,
      "grad_norm": 0.005360117182135582,
      "learning_rate": 3.585747731928593e-05,
      "loss": 0.0001,
      "step": 1933
    },
    {
      "epoch": 0.5659935616037459,
      "grad_norm": 0.012410847470164299,
      "learning_rate": 3.585016095990635e-05,
      "loss": 0.0003,
      "step": 1934
    },
    {
      "epoch": 0.5662862159789289,
      "grad_norm": 0.12954729795455933,
      "learning_rate": 3.584284460052678e-05,
      "loss": 0.002,
      "step": 1935
    },
    {
      "epoch": 0.5665788703541118,
      "grad_norm": 0.04091344401240349,
      "learning_rate": 3.5835528241147206e-05,
      "loss": 0.0007,
      "step": 1936
    },
    {
      "epoch": 0.5668715247292947,
      "grad_norm": 0.11257024854421616,
      "learning_rate": 3.5828211881767634e-05,
      "loss": 0.0011,
      "step": 1937
    },
    {
      "epoch": 0.5671641791044776,
      "grad_norm": 0.07596535235643387,
      "learning_rate": 3.582089552238806e-05,
      "loss": 0.0015,
      "step": 1938
    },
    {
      "epoch": 0.5674568334796605,
      "grad_norm": 0.14624670147895813,
      "learning_rate": 3.581357916300849e-05,
      "loss": 0.0018,
      "step": 1939
    },
    {
      "epoch": 0.5677494878548435,
      "grad_norm": 0.23475168645381927,
      "learning_rate": 3.580626280362892e-05,
      "loss": 0.0019,
      "step": 1940
    },
    {
      "epoch": 0.5680421422300264,
      "grad_norm": 0.02123354561626911,
      "learning_rate": 3.5798946444249346e-05,
      "loss": 0.0003,
      "step": 1941
    },
    {
      "epoch": 0.5683347966052092,
      "grad_norm": 0.09904786199331284,
      "learning_rate": 3.579163008486977e-05,
      "loss": 0.001,
      "step": 1942
    },
    {
      "epoch": 0.5686274509803921,
      "grad_norm": 7.794208526611328,
      "learning_rate": 3.5784313725490195e-05,
      "loss": 0.1359,
      "step": 1943
    },
    {
      "epoch": 0.568920105355575,
      "grad_norm": 0.014828276820480824,
      "learning_rate": 3.577699736611062e-05,
      "loss": 0.0003,
      "step": 1944
    },
    {
      "epoch": 0.569212759730758,
      "grad_norm": 0.08800917863845825,
      "learning_rate": 3.576968100673105e-05,
      "loss": 0.0012,
      "step": 1945
    },
    {
      "epoch": 0.5695054141059409,
      "grad_norm": 0.02050822600722313,
      "learning_rate": 3.576236464735148e-05,
      "loss": 0.0003,
      "step": 1946
    },
    {
      "epoch": 0.5697980684811238,
      "grad_norm": 0.006806317251175642,
      "learning_rate": 3.575504828797191e-05,
      "loss": 0.0002,
      "step": 1947
    },
    {
      "epoch": 0.5700907228563067,
      "grad_norm": 0.008568843826651573,
      "learning_rate": 3.5747731928592335e-05,
      "loss": 0.0002,
      "step": 1948
    },
    {
      "epoch": 0.5703833772314896,
      "grad_norm": 0.003100211266428232,
      "learning_rate": 3.574041556921276e-05,
      "loss": 0.0001,
      "step": 1949
    },
    {
      "epoch": 0.5706760316066726,
      "grad_norm": 0.14013731479644775,
      "learning_rate": 3.5733099209833184e-05,
      "loss": 0.0006,
      "step": 1950
    },
    {
      "epoch": 0.5709686859818555,
      "grad_norm": 0.13786007463932037,
      "learning_rate": 3.572578285045361e-05,
      "loss": 0.0017,
      "step": 1951
    },
    {
      "epoch": 0.5712613403570383,
      "grad_norm": 0.0029935319907963276,
      "learning_rate": 3.571846649107404e-05,
      "loss": 0.0001,
      "step": 1952
    },
    {
      "epoch": 0.5715539947322212,
      "grad_norm": 1.9697563648223877,
      "learning_rate": 3.571115013169447e-05,
      "loss": 0.2066,
      "step": 1953
    },
    {
      "epoch": 0.5718466491074041,
      "grad_norm": 0.024167198687791824,
      "learning_rate": 3.5703833772314896e-05,
      "loss": 0.0003,
      "step": 1954
    },
    {
      "epoch": 0.572139303482587,
      "grad_norm": 0.005826764740049839,
      "learning_rate": 3.5696517412935324e-05,
      "loss": 0.0001,
      "step": 1955
    },
    {
      "epoch": 0.57243195785777,
      "grad_norm": 1.85420823097229,
      "learning_rate": 3.568920105355575e-05,
      "loss": 0.0104,
      "step": 1956
    },
    {
      "epoch": 0.5727246122329529,
      "grad_norm": 0.15664705634117126,
      "learning_rate": 3.568188469417618e-05,
      "loss": 0.0021,
      "step": 1957
    },
    {
      "epoch": 0.5730172666081358,
      "grad_norm": 6.557300090789795,
      "learning_rate": 3.56745683347966e-05,
      "loss": 0.2577,
      "step": 1958
    },
    {
      "epoch": 0.5733099209833187,
      "grad_norm": 0.13520510494709015,
      "learning_rate": 3.566725197541703e-05,
      "loss": 0.0021,
      "step": 1959
    },
    {
      "epoch": 0.5736025753585016,
      "grad_norm": 0.031301479786634445,
      "learning_rate": 3.565993561603746e-05,
      "loss": 0.0005,
      "step": 1960
    },
    {
      "epoch": 0.5738952297336846,
      "grad_norm": 0.015609705820679665,
      "learning_rate": 3.5652619256657885e-05,
      "loss": 0.0004,
      "step": 1961
    },
    {
      "epoch": 0.5741878841088675,
      "grad_norm": 0.052701354026794434,
      "learning_rate": 3.564530289727831e-05,
      "loss": 0.001,
      "step": 1962
    },
    {
      "epoch": 0.5744805384840503,
      "grad_norm": 0.007138526998460293,
      "learning_rate": 3.563798653789874e-05,
      "loss": 0.0002,
      "step": 1963
    },
    {
      "epoch": 0.5747731928592332,
      "grad_norm": 0.059247542172670364,
      "learning_rate": 3.563067017851917e-05,
      "loss": 0.0008,
      "step": 1964
    },
    {
      "epoch": 0.5750658472344161,
      "grad_norm": 0.011258046142756939,
      "learning_rate": 3.5623353819139597e-05,
      "loss": 0.0002,
      "step": 1965
    },
    {
      "epoch": 0.5753585016095991,
      "grad_norm": 0.011694671586155891,
      "learning_rate": 3.5616037459760025e-05,
      "loss": 0.0003,
      "step": 1966
    },
    {
      "epoch": 0.575651155984782,
      "grad_norm": 0.02896781824529171,
      "learning_rate": 3.560872110038045e-05,
      "loss": 0.0006,
      "step": 1967
    },
    {
      "epoch": 0.5759438103599649,
      "grad_norm": 4.3482890129089355,
      "learning_rate": 3.560140474100088e-05,
      "loss": 0.1504,
      "step": 1968
    },
    {
      "epoch": 0.5762364647351478,
      "grad_norm": 0.2739729583263397,
      "learning_rate": 3.55940883816213e-05,
      "loss": 0.0012,
      "step": 1969
    },
    {
      "epoch": 0.5765291191103307,
      "grad_norm": 0.013300075195729733,
      "learning_rate": 3.558677202224173e-05,
      "loss": 0.0003,
      "step": 1970
    },
    {
      "epoch": 0.5768217734855136,
      "grad_norm": 0.9926109910011292,
      "learning_rate": 3.557945566286216e-05,
      "loss": 0.0046,
      "step": 1971
    },
    {
      "epoch": 0.5771144278606966,
      "grad_norm": 0.014598459005355835,
      "learning_rate": 3.5572139303482585e-05,
      "loss": 0.0003,
      "step": 1972
    },
    {
      "epoch": 0.5774070822358794,
      "grad_norm": 0.02771534025669098,
      "learning_rate": 3.556482294410301e-05,
      "loss": 0.0005,
      "step": 1973
    },
    {
      "epoch": 0.5776997366110623,
      "grad_norm": 0.058283593505620956,
      "learning_rate": 3.555750658472344e-05,
      "loss": 0.0011,
      "step": 1974
    },
    {
      "epoch": 0.5779923909862452,
      "grad_norm": 0.9233146905899048,
      "learning_rate": 3.555019022534387e-05,
      "loss": 0.0078,
      "step": 1975
    },
    {
      "epoch": 0.5782850453614281,
      "grad_norm": 0.0344410240650177,
      "learning_rate": 3.55428738659643e-05,
      "loss": 0.0007,
      "step": 1976
    },
    {
      "epoch": 0.5785776997366111,
      "grad_norm": 0.9516710638999939,
      "learning_rate": 3.5535557506584725e-05,
      "loss": 0.0086,
      "step": 1977
    },
    {
      "epoch": 0.578870354111794,
      "grad_norm": 0.01560552790760994,
      "learning_rate": 3.552824114720515e-05,
      "loss": 0.0003,
      "step": 1978
    },
    {
      "epoch": 0.5791630084869769,
      "grad_norm": 0.36015036702156067,
      "learning_rate": 3.552092478782558e-05,
      "loss": 0.0016,
      "step": 1979
    },
    {
      "epoch": 0.5794556628621598,
      "grad_norm": 0.013730215840041637,
      "learning_rate": 3.5513608428446e-05,
      "loss": 0.0003,
      "step": 1980
    },
    {
      "epoch": 0.5797483172373427,
      "grad_norm": 0.182551309466362,
      "learning_rate": 3.550629206906643e-05,
      "loss": 0.0013,
      "step": 1981
    },
    {
      "epoch": 0.5800409716125257,
      "grad_norm": 0.010926664806902409,
      "learning_rate": 3.549897570968686e-05,
      "loss": 0.0002,
      "step": 1982
    },
    {
      "epoch": 0.5803336259877085,
      "grad_norm": 0.00620716018602252,
      "learning_rate": 3.5491659350307286e-05,
      "loss": 0.0001,
      "step": 1983
    },
    {
      "epoch": 0.5806262803628914,
      "grad_norm": 0.010993091389536858,
      "learning_rate": 3.5484342990927714e-05,
      "loss": 0.0002,
      "step": 1984
    },
    {
      "epoch": 0.5809189347380743,
      "grad_norm": 0.0013883326901122928,
      "learning_rate": 3.547702663154814e-05,
      "loss": 0.0,
      "step": 1985
    },
    {
      "epoch": 0.5812115891132572,
      "grad_norm": 0.0055152433924376965,
      "learning_rate": 3.546971027216857e-05,
      "loss": 0.0001,
      "step": 1986
    },
    {
      "epoch": 0.5815042434884401,
      "grad_norm": 7.650897026062012,
      "learning_rate": 3.5462393912789e-05,
      "loss": 0.0135,
      "step": 1987
    },
    {
      "epoch": 0.5817968978636231,
      "grad_norm": 0.0018448525806888938,
      "learning_rate": 3.5455077553409426e-05,
      "loss": 0.0001,
      "step": 1988
    },
    {
      "epoch": 0.582089552238806,
      "grad_norm": 0.007545731961727142,
      "learning_rate": 3.5447761194029854e-05,
      "loss": 0.0001,
      "step": 1989
    },
    {
      "epoch": 0.5823822066139889,
      "grad_norm": 0.002951526315882802,
      "learning_rate": 3.5440444834650275e-05,
      "loss": 0.0001,
      "step": 1990
    },
    {
      "epoch": 0.5826748609891718,
      "grad_norm": 0.006255722139030695,
      "learning_rate": 3.54331284752707e-05,
      "loss": 0.0002,
      "step": 1991
    },
    {
      "epoch": 0.5829675153643546,
      "grad_norm": 0.011105085723102093,
      "learning_rate": 3.542581211589113e-05,
      "loss": 0.0002,
      "step": 1992
    },
    {
      "epoch": 0.5832601697395376,
      "grad_norm": 0.05295855924487114,
      "learning_rate": 3.541849575651156e-05,
      "loss": 0.0006,
      "step": 1993
    },
    {
      "epoch": 0.5835528241147205,
      "grad_norm": 0.3890467584133148,
      "learning_rate": 3.541117939713199e-05,
      "loss": 0.0014,
      "step": 1994
    },
    {
      "epoch": 0.5838454784899034,
      "grad_norm": 0.10936211794614792,
      "learning_rate": 3.5403863037752415e-05,
      "loss": 0.0011,
      "step": 1995
    },
    {
      "epoch": 0.5841381328650863,
      "grad_norm": 0.0033165684435516596,
      "learning_rate": 3.539654667837284e-05,
      "loss": 0.0001,
      "step": 1996
    },
    {
      "epoch": 0.5844307872402692,
      "grad_norm": 0.1402590274810791,
      "learning_rate": 3.538923031899327e-05,
      "loss": 0.0007,
      "step": 1997
    },
    {
      "epoch": 0.5847234416154522,
      "grad_norm": 0.003403761889785528,
      "learning_rate": 3.53819139596137e-05,
      "loss": 0.0001,
      "step": 1998
    },
    {
      "epoch": 0.5850160959906351,
      "grad_norm": 0.010263781994581223,
      "learning_rate": 3.5374597600234127e-05,
      "loss": 0.0001,
      "step": 1999
    },
    {
      "epoch": 0.585308750365818,
      "grad_norm": 0.001706862822175026,
      "learning_rate": 3.5367281240854554e-05,
      "loss": 0.0,
      "step": 2000
    },
    {
      "epoch": 0.5856014047410009,
      "grad_norm": 0.03570732846856117,
      "learning_rate": 3.5359964881474976e-05,
      "loss": 0.0003,
      "step": 2001
    },
    {
      "epoch": 0.5858940591161838,
      "grad_norm": 0.0018397400854155421,
      "learning_rate": 3.5352648522095404e-05,
      "loss": 0.0,
      "step": 2002
    },
    {
      "epoch": 0.5861867134913668,
      "grad_norm": 0.0006071400130167603,
      "learning_rate": 3.534533216271583e-05,
      "loss": 0.0,
      "step": 2003
    },
    {
      "epoch": 0.5864793678665496,
      "grad_norm": 0.012687459588050842,
      "learning_rate": 3.533801580333626e-05,
      "loss": 0.0002,
      "step": 2004
    },
    {
      "epoch": 0.5867720222417325,
      "grad_norm": 0.0034372718073427677,
      "learning_rate": 3.533069944395669e-05,
      "loss": 0.0001,
      "step": 2005
    },
    {
      "epoch": 0.5870646766169154,
      "grad_norm": 0.00016561755910515785,
      "learning_rate": 3.5323383084577115e-05,
      "loss": 0.0,
      "step": 2006
    },
    {
      "epoch": 0.5873573309920983,
      "grad_norm": 0.006079351529479027,
      "learning_rate": 3.531606672519754e-05,
      "loss": 0.0001,
      "step": 2007
    },
    {
      "epoch": 0.5876499853672812,
      "grad_norm": 0.3710552752017975,
      "learning_rate": 3.530875036581797e-05,
      "loss": 0.0008,
      "step": 2008
    },
    {
      "epoch": 0.5879426397424642,
      "grad_norm": 0.0008314733277074993,
      "learning_rate": 3.53014340064384e-05,
      "loss": 0.0,
      "step": 2009
    },
    {
      "epoch": 0.5882352941176471,
      "grad_norm": 0.023041611537337303,
      "learning_rate": 3.529411764705883e-05,
      "loss": 0.0001,
      "step": 2010
    },
    {
      "epoch": 0.58852794849283,
      "grad_norm": 0.0008120948914438486,
      "learning_rate": 3.528680128767925e-05,
      "loss": 0.0,
      "step": 2011
    },
    {
      "epoch": 0.5888206028680129,
      "grad_norm": 0.0011115546803921461,
      "learning_rate": 3.5279484928299676e-05,
      "loss": 0.0,
      "step": 2012
    },
    {
      "epoch": 0.5891132572431957,
      "grad_norm": 0.009538223035633564,
      "learning_rate": 3.5272168568920104e-05,
      "loss": 0.0002,
      "step": 2013
    },
    {
      "epoch": 0.5894059116183787,
      "grad_norm": 0.007257380057126284,
      "learning_rate": 3.526485220954053e-05,
      "loss": 0.0001,
      "step": 2014
    },
    {
      "epoch": 0.5896985659935616,
      "grad_norm": 0.0024914678651839495,
      "learning_rate": 3.525753585016096e-05,
      "loss": 0.0001,
      "step": 2015
    },
    {
      "epoch": 0.5899912203687445,
      "grad_norm": 0.004597298800945282,
      "learning_rate": 3.525021949078139e-05,
      "loss": 0.0001,
      "step": 2016
    },
    {
      "epoch": 0.5902838747439274,
      "grad_norm": 0.0012197772739455104,
      "learning_rate": 3.5242903131401816e-05,
      "loss": 0.0,
      "step": 2017
    },
    {
      "epoch": 0.5905765291191103,
      "grad_norm": 0.0014564783778041601,
      "learning_rate": 3.5235586772022244e-05,
      "loss": 0.0,
      "step": 2018
    },
    {
      "epoch": 0.5908691834942933,
      "grad_norm": 0.000955626368522644,
      "learning_rate": 3.522827041264267e-05,
      "loss": 0.0,
      "step": 2019
    },
    {
      "epoch": 0.5911618378694762,
      "grad_norm": 0.0003709029115270823,
      "learning_rate": 3.52209540532631e-05,
      "loss": 0.0,
      "step": 2020
    },
    {
      "epoch": 0.5914544922446591,
      "grad_norm": 0.11591051518917084,
      "learning_rate": 3.521363769388353e-05,
      "loss": 0.0005,
      "step": 2021
    },
    {
      "epoch": 0.591747146619842,
      "grad_norm": 0.001540387631393969,
      "learning_rate": 3.520632133450395e-05,
      "loss": 0.0,
      "step": 2022
    },
    {
      "epoch": 0.5920398009950248,
      "grad_norm": 1.1781518459320068,
      "learning_rate": 3.519900497512438e-05,
      "loss": 0.0042,
      "step": 2023
    },
    {
      "epoch": 0.5923324553702077,
      "grad_norm": 0.0009852898074313998,
      "learning_rate": 3.5191688615744805e-05,
      "loss": 0.0,
      "step": 2024
    },
    {
      "epoch": 0.5926251097453907,
      "grad_norm": 0.0017510764300823212,
      "learning_rate": 3.518437225636523e-05,
      "loss": 0.0,
      "step": 2025
    },
    {
      "epoch": 0.5929177641205736,
      "grad_norm": 0.004155490547418594,
      "learning_rate": 3.517705589698566e-05,
      "loss": 0.0001,
      "step": 2026
    },
    {
      "epoch": 0.5932104184957565,
      "grad_norm": 0.08749844878911972,
      "learning_rate": 3.516973953760609e-05,
      "loss": 0.0005,
      "step": 2027
    },
    {
      "epoch": 0.5935030728709394,
      "grad_norm": 0.0029032984748482704,
      "learning_rate": 3.516242317822652e-05,
      "loss": 0.0,
      "step": 2028
    },
    {
      "epoch": 0.5937957272461223,
      "grad_norm": 0.0009475258993916214,
      "learning_rate": 3.5155106818846945e-05,
      "loss": 0.0,
      "step": 2029
    },
    {
      "epoch": 0.5940883816213053,
      "grad_norm": 0.0036558397114276886,
      "learning_rate": 3.514779045946737e-05,
      "loss": 0.0,
      "step": 2030
    },
    {
      "epoch": 0.5943810359964882,
      "grad_norm": 0.0013729785569012165,
      "learning_rate": 3.51404741000878e-05,
      "loss": 0.0,
      "step": 2031
    },
    {
      "epoch": 0.5946736903716711,
      "grad_norm": 0.06336381286382675,
      "learning_rate": 3.513315774070823e-05,
      "loss": 0.0002,
      "step": 2032
    },
    {
      "epoch": 0.594966344746854,
      "grad_norm": 0.23235172033309937,
      "learning_rate": 3.512584138132865e-05,
      "loss": 0.0011,
      "step": 2033
    },
    {
      "epoch": 0.5952589991220368,
      "grad_norm": 0.08274620771408081,
      "learning_rate": 3.511852502194908e-05,
      "loss": 0.0004,
      "step": 2034
    },
    {
      "epoch": 0.5955516534972198,
      "grad_norm": 11.089838981628418,
      "learning_rate": 3.5111208662569506e-05,
      "loss": 0.0176,
      "step": 2035
    },
    {
      "epoch": 0.5958443078724027,
      "grad_norm": 0.001164097455330193,
      "learning_rate": 3.5103892303189934e-05,
      "loss": 0.0,
      "step": 2036
    },
    {
      "epoch": 0.5961369622475856,
      "grad_norm": 0.0018230958376079798,
      "learning_rate": 3.509657594381036e-05,
      "loss": 0.0,
      "step": 2037
    },
    {
      "epoch": 0.5964296166227685,
      "grad_norm": 0.0010525623802095652,
      "learning_rate": 3.508925958443079e-05,
      "loss": 0.0,
      "step": 2038
    },
    {
      "epoch": 0.5967222709979514,
      "grad_norm": 0.000734026194550097,
      "learning_rate": 3.508194322505122e-05,
      "loss": 0.0,
      "step": 2039
    },
    {
      "epoch": 0.5970149253731343,
      "grad_norm": 0.12918123602867126,
      "learning_rate": 3.5074626865671645e-05,
      "loss": 0.0003,
      "step": 2040
    },
    {
      "epoch": 0.5973075797483173,
      "grad_norm": 0.1338168829679489,
      "learning_rate": 3.506731050629207e-05,
      "loss": 0.0005,
      "step": 2041
    },
    {
      "epoch": 0.5976002341235002,
      "grad_norm": 0.0012683144304901361,
      "learning_rate": 3.50599941469125e-05,
      "loss": 0.0,
      "step": 2042
    },
    {
      "epoch": 0.597892888498683,
      "grad_norm": 0.0012570394901558757,
      "learning_rate": 3.505267778753292e-05,
      "loss": 0.0,
      "step": 2043
    },
    {
      "epoch": 0.5981855428738659,
      "grad_norm": 0.00037313997745513916,
      "learning_rate": 3.504536142815335e-05,
      "loss": 0.0,
      "step": 2044
    },
    {
      "epoch": 0.5984781972490488,
      "grad_norm": 0.0002580972795840353,
      "learning_rate": 3.503804506877378e-05,
      "loss": 0.0,
      "step": 2045
    },
    {
      "epoch": 0.5987708516242318,
      "grad_norm": 0.0012548977974802256,
      "learning_rate": 3.5030728709394206e-05,
      "loss": 0.0,
      "step": 2046
    },
    {
      "epoch": 0.5990635059994147,
      "grad_norm": 0.00040712079498916864,
      "learning_rate": 3.5023412350014634e-05,
      "loss": 0.0,
      "step": 2047
    },
    {
      "epoch": 0.5993561603745976,
      "grad_norm": 0.0012564021162688732,
      "learning_rate": 3.501609599063506e-05,
      "loss": 0.0,
      "step": 2048
    },
    {
      "epoch": 0.5996488147497805,
      "grad_norm": 0.0002876602520700544,
      "learning_rate": 3.500877963125549e-05,
      "loss": 0.0,
      "step": 2049
    },
    {
      "epoch": 0.5999414691249634,
      "grad_norm": 0.00044737564167007804,
      "learning_rate": 3.500146327187592e-05,
      "loss": 0.0,
      "step": 2050
    },
    {
      "epoch": 0.6002341235001464,
      "grad_norm": 25.57400894165039,
      "learning_rate": 3.4994146912496346e-05,
      "loss": 0.0318,
      "step": 2051
    },
    {
      "epoch": 0.6005267778753293,
      "grad_norm": 0.0019366320921108127,
      "learning_rate": 3.4986830553116774e-05,
      "loss": 0.0,
      "step": 2052
    },
    {
      "epoch": 0.6008194322505122,
      "grad_norm": 0.032917775213718414,
      "learning_rate": 3.49795141937372e-05,
      "loss": 0.0001,
      "step": 2053
    },
    {
      "epoch": 0.601112086625695,
      "grad_norm": 0.0001589482999406755,
      "learning_rate": 3.497219783435762e-05,
      "loss": 0.0,
      "step": 2054
    },
    {
      "epoch": 0.6014047410008779,
      "grad_norm": 0.0005509228794835508,
      "learning_rate": 3.496488147497805e-05,
      "loss": 0.0,
      "step": 2055
    },
    {
      "epoch": 0.6016973953760608,
      "grad_norm": 21.30624008178711,
      "learning_rate": 3.495756511559848e-05,
      "loss": 0.0818,
      "step": 2056
    },
    {
      "epoch": 0.6019900497512438,
      "grad_norm": 0.0004659033438656479,
      "learning_rate": 3.495024875621891e-05,
      "loss": 0.0,
      "step": 2057
    },
    {
      "epoch": 0.6022827041264267,
      "grad_norm": 0.0006594470469281077,
      "learning_rate": 3.4942932396839335e-05,
      "loss": 0.0,
      "step": 2058
    },
    {
      "epoch": 0.6025753585016096,
      "grad_norm": 0.003137239022180438,
      "learning_rate": 3.493561603745976e-05,
      "loss": 0.0,
      "step": 2059
    },
    {
      "epoch": 0.6028680128767925,
      "grad_norm": 4.280525207519531,
      "learning_rate": 3.492829967808019e-05,
      "loss": 0.0094,
      "step": 2060
    },
    {
      "epoch": 0.6031606672519754,
      "grad_norm": 0.013040806166827679,
      "learning_rate": 3.492098331870062e-05,
      "loss": 0.0001,
      "step": 2061
    },
    {
      "epoch": 0.6034533216271584,
      "grad_norm": 0.0018619210459291935,
      "learning_rate": 3.491366695932105e-05,
      "loss": 0.0,
      "step": 2062
    },
    {
      "epoch": 0.6037459760023413,
      "grad_norm": 0.004644501954317093,
      "learning_rate": 3.4906350599941475e-05,
      "loss": 0.0,
      "step": 2063
    },
    {
      "epoch": 0.6040386303775241,
      "grad_norm": 0.0015707537531852722,
      "learning_rate": 3.4899034240561896e-05,
      "loss": 0.0,
      "step": 2064
    },
    {
      "epoch": 0.604331284752707,
      "grad_norm": 13.5844144821167,
      "learning_rate": 3.4891717881182324e-05,
      "loss": 0.2497,
      "step": 2065
    },
    {
      "epoch": 0.6046239391278899,
      "grad_norm": 0.007252132520079613,
      "learning_rate": 3.488440152180275e-05,
      "loss": 0.0,
      "step": 2066
    },
    {
      "epoch": 0.6049165935030729,
      "grad_norm": 0.001173017080873251,
      "learning_rate": 3.487708516242318e-05,
      "loss": 0.0,
      "step": 2067
    },
    {
      "epoch": 0.6052092478782558,
      "grad_norm": 0.02849682979285717,
      "learning_rate": 3.486976880304361e-05,
      "loss": 0.0001,
      "step": 2068
    },
    {
      "epoch": 0.6055019022534387,
      "grad_norm": 0.5588051676750183,
      "learning_rate": 3.4862452443664036e-05,
      "loss": 0.0015,
      "step": 2069
    },
    {
      "epoch": 0.6057945566286216,
      "grad_norm": 13.102985382080078,
      "learning_rate": 3.4855136084284464e-05,
      "loss": 0.0956,
      "step": 2070
    },
    {
      "epoch": 0.6060872110038045,
      "grad_norm": 0.00035603917785920203,
      "learning_rate": 3.484781972490489e-05,
      "loss": 0.0,
      "step": 2071
    },
    {
      "epoch": 0.6063798653789875,
      "grad_norm": 2.3206276893615723,
      "learning_rate": 3.484050336552532e-05,
      "loss": 0.0066,
      "step": 2072
    },
    {
      "epoch": 0.6066725197541704,
      "grad_norm": 0.0003959232126362622,
      "learning_rate": 3.483318700614575e-05,
      "loss": 0.0,
      "step": 2073
    },
    {
      "epoch": 0.6069651741293532,
      "grad_norm": 0.00046805141028016806,
      "learning_rate": 3.4825870646766175e-05,
      "loss": 0.0,
      "step": 2074
    },
    {
      "epoch": 0.6072578285045361,
      "grad_norm": 0.041566260159015656,
      "learning_rate": 3.4818554287386597e-05,
      "loss": 0.0002,
      "step": 2075
    },
    {
      "epoch": 0.607550482879719,
      "grad_norm": 0.0005712300189770758,
      "learning_rate": 3.4811237928007024e-05,
      "loss": 0.0,
      "step": 2076
    },
    {
      "epoch": 0.6078431372549019,
      "grad_norm": 0.02407524734735489,
      "learning_rate": 3.480392156862745e-05,
      "loss": 0.0001,
      "step": 2077
    },
    {
      "epoch": 0.6081357916300849,
      "grad_norm": 0.000892152835149318,
      "learning_rate": 3.479660520924788e-05,
      "loss": 0.0,
      "step": 2078
    },
    {
      "epoch": 0.6084284460052678,
      "grad_norm": 7.137636661529541,
      "learning_rate": 3.478928884986831e-05,
      "loss": 0.0089,
      "step": 2079
    },
    {
      "epoch": 0.6087211003804507,
      "grad_norm": 0.0011299810139462352,
      "learning_rate": 3.4781972490488736e-05,
      "loss": 0.0,
      "step": 2080
    },
    {
      "epoch": 0.6090137547556336,
      "grad_norm": 0.0006017218693159521,
      "learning_rate": 3.4774656131109164e-05,
      "loss": 0.0,
      "step": 2081
    },
    {
      "epoch": 0.6093064091308165,
      "grad_norm": 0.0011323533253744245,
      "learning_rate": 3.476733977172959e-05,
      "loss": 0.0,
      "step": 2082
    },
    {
      "epoch": 0.6095990635059995,
      "grad_norm": 0.04937488213181496,
      "learning_rate": 3.476002341235002e-05,
      "loss": 0.0001,
      "step": 2083
    },
    {
      "epoch": 0.6098917178811823,
      "grad_norm": 13.617822647094727,
      "learning_rate": 3.475270705297045e-05,
      "loss": 0.1336,
      "step": 2084
    },
    {
      "epoch": 0.6101843722563652,
      "grad_norm": 0.01302315853536129,
      "learning_rate": 3.4745390693590876e-05,
      "loss": 0.0001,
      "step": 2085
    },
    {
      "epoch": 0.6104770266315481,
      "grad_norm": 0.002732899971306324,
      "learning_rate": 3.47380743342113e-05,
      "loss": 0.0001,
      "step": 2086
    },
    {
      "epoch": 0.610769681006731,
      "grad_norm": 0.09016023576259613,
      "learning_rate": 3.4730757974831725e-05,
      "loss": 0.0004,
      "step": 2087
    },
    {
      "epoch": 0.611062335381914,
      "grad_norm": 0.0027853124774992466,
      "learning_rate": 3.472344161545215e-05,
      "loss": 0.0,
      "step": 2088
    },
    {
      "epoch": 0.6113549897570969,
      "grad_norm": 0.02375302091240883,
      "learning_rate": 3.471612525607258e-05,
      "loss": 0.0001,
      "step": 2089
    },
    {
      "epoch": 0.6116476441322798,
      "grad_norm": 9.565156936645508,
      "learning_rate": 3.470880889669301e-05,
      "loss": 0.0315,
      "step": 2090
    },
    {
      "epoch": 0.6119402985074627,
      "grad_norm": 0.001845332677476108,
      "learning_rate": 3.470149253731344e-05,
      "loss": 0.0,
      "step": 2091
    },
    {
      "epoch": 0.6122329528826456,
      "grad_norm": 0.0012049475917592645,
      "learning_rate": 3.4694176177933865e-05,
      "loss": 0.0,
      "step": 2092
    },
    {
      "epoch": 0.6125256072578285,
      "grad_norm": 0.04229447618126869,
      "learning_rate": 3.468685981855429e-05,
      "loss": 0.0001,
      "step": 2093
    },
    {
      "epoch": 0.6128182616330115,
      "grad_norm": 0.0011197493877261877,
      "learning_rate": 3.467954345917472e-05,
      "loss": 0.0,
      "step": 2094
    },
    {
      "epoch": 0.6131109160081943,
      "grad_norm": 0.0012051331577822566,
      "learning_rate": 3.467222709979515e-05,
      "loss": 0.0,
      "step": 2095
    },
    {
      "epoch": 0.6134035703833772,
      "grad_norm": 0.003435655264183879,
      "learning_rate": 3.466491074041557e-05,
      "loss": 0.0,
      "step": 2096
    },
    {
      "epoch": 0.6136962247585601,
      "grad_norm": 0.0005401496891863644,
      "learning_rate": 3.4657594381036e-05,
      "loss": 0.0,
      "step": 2097
    },
    {
      "epoch": 0.613988879133743,
      "grad_norm": 0.0385284349322319,
      "learning_rate": 3.4650278021656426e-05,
      "loss": 0.0002,
      "step": 2098
    },
    {
      "epoch": 0.614281533508926,
      "grad_norm": 0.004104093182832003,
      "learning_rate": 3.4642961662276854e-05,
      "loss": 0.0,
      "step": 2099
    },
    {
      "epoch": 0.6145741878841089,
      "grad_norm": 0.001205161795951426,
      "learning_rate": 3.463564530289728e-05,
      "loss": 0.0,
      "step": 2100
    },
    {
      "epoch": 0.6148668422592918,
      "grad_norm": 0.0005487053422257304,
      "learning_rate": 3.462832894351771e-05,
      "loss": 0.0,
      "step": 2101
    },
    {
      "epoch": 0.6151594966344747,
      "grad_norm": 0.002301194006577134,
      "learning_rate": 3.462101258413814e-05,
      "loss": 0.0,
      "step": 2102
    },
    {
      "epoch": 0.6154521510096576,
      "grad_norm": 0.06975915282964706,
      "learning_rate": 3.4613696224758566e-05,
      "loss": 0.0003,
      "step": 2103
    },
    {
      "epoch": 0.6157448053848406,
      "grad_norm": 2.4628310203552246,
      "learning_rate": 3.4606379865378994e-05,
      "loss": 0.003,
      "step": 2104
    },
    {
      "epoch": 0.6160374597600234,
      "grad_norm": 0.0005157164996489882,
      "learning_rate": 3.459906350599942e-05,
      "loss": 0.0,
      "step": 2105
    },
    {
      "epoch": 0.6163301141352063,
      "grad_norm": 0.0017777555622160435,
      "learning_rate": 3.459174714661985e-05,
      "loss": 0.0,
      "step": 2106
    },
    {
      "epoch": 0.6166227685103892,
      "grad_norm": 0.0010313241509720683,
      "learning_rate": 3.458443078724027e-05,
      "loss": 0.0,
      "step": 2107
    },
    {
      "epoch": 0.6169154228855721,
      "grad_norm": 0.0005064535071142018,
      "learning_rate": 3.45771144278607e-05,
      "loss": 0.0,
      "step": 2108
    },
    {
      "epoch": 0.617208077260755,
      "grad_norm": 0.0007273271330632269,
      "learning_rate": 3.4569798068481127e-05,
      "loss": 0.0,
      "step": 2109
    },
    {
      "epoch": 0.617500731635938,
      "grad_norm": 0.0013223080895841122,
      "learning_rate": 3.4562481709101554e-05,
      "loss": 0.0,
      "step": 2110
    },
    {
      "epoch": 0.6177933860111209,
      "grad_norm": 0.0034261911641806364,
      "learning_rate": 3.455516534972198e-05,
      "loss": 0.0,
      "step": 2111
    },
    {
      "epoch": 0.6180860403863038,
      "grad_norm": 0.0024893530644476414,
      "learning_rate": 3.454784899034241e-05,
      "loss": 0.0,
      "step": 2112
    },
    {
      "epoch": 0.6183786947614867,
      "grad_norm": 13.454879760742188,
      "learning_rate": 3.454053263096284e-05,
      "loss": 0.0475,
      "step": 2113
    },
    {
      "epoch": 0.6186713491366695,
      "grad_norm": 0.0004851063422393054,
      "learning_rate": 3.4533216271583266e-05,
      "loss": 0.0,
      "step": 2114
    },
    {
      "epoch": 0.6189640035118525,
      "grad_norm": 0.00036992091918364167,
      "learning_rate": 3.452589991220369e-05,
      "loss": 0.0,
      "step": 2115
    },
    {
      "epoch": 0.6192566578870354,
      "grad_norm": 0.0005809810827486217,
      "learning_rate": 3.4518583552824115e-05,
      "loss": 0.0,
      "step": 2116
    },
    {
      "epoch": 0.6195493122622183,
      "grad_norm": 0.0004890324780717492,
      "learning_rate": 3.451126719344454e-05,
      "loss": 0.0,
      "step": 2117
    },
    {
      "epoch": 0.6198419666374012,
      "grad_norm": 0.0031506626401096582,
      "learning_rate": 3.450395083406497e-05,
      "loss": 0.0,
      "step": 2118
    },
    {
      "epoch": 0.6201346210125841,
      "grad_norm": 1.1961182355880737,
      "learning_rate": 3.44966344746854e-05,
      "loss": 0.002,
      "step": 2119
    },
    {
      "epoch": 0.6204272753877671,
      "grad_norm": 0.00019465781224425882,
      "learning_rate": 3.448931811530583e-05,
      "loss": 0.0,
      "step": 2120
    },
    {
      "epoch": 0.62071992976295,
      "grad_norm": 0.0013921204954385757,
      "learning_rate": 3.4482001755926255e-05,
      "loss": 0.0,
      "step": 2121
    },
    {
      "epoch": 0.6210125841381329,
      "grad_norm": 0.00045760799548588693,
      "learning_rate": 3.447468539654668e-05,
      "loss": 0.0,
      "step": 2122
    },
    {
      "epoch": 0.6213052385133158,
      "grad_norm": 0.002929717069491744,
      "learning_rate": 3.4467369037167104e-05,
      "loss": 0.0,
      "step": 2123
    },
    {
      "epoch": 0.6215978928884986,
      "grad_norm": 0.0006322418921627104,
      "learning_rate": 3.446005267778753e-05,
      "loss": 0.0,
      "step": 2124
    },
    {
      "epoch": 0.6218905472636815,
      "grad_norm": 0.00026546447770670056,
      "learning_rate": 3.445273631840796e-05,
      "loss": 0.0,
      "step": 2125
    },
    {
      "epoch": 0.6221832016388645,
      "grad_norm": 0.0008917133673094213,
      "learning_rate": 3.444541995902839e-05,
      "loss": 0.0,
      "step": 2126
    },
    {
      "epoch": 0.6224758560140474,
      "grad_norm": 0.10912005603313446,
      "learning_rate": 3.4438103599648816e-05,
      "loss": 0.0003,
      "step": 2127
    },
    {
      "epoch": 0.6227685103892303,
      "grad_norm": 0.0013562339590862393,
      "learning_rate": 3.4430787240269244e-05,
      "loss": 0.0,
      "step": 2128
    },
    {
      "epoch": 0.6230611647644132,
      "grad_norm": 4.516958713531494,
      "learning_rate": 3.442347088088967e-05,
      "loss": 0.191,
      "step": 2129
    },
    {
      "epoch": 0.6233538191395961,
      "grad_norm": 0.00037854915717616677,
      "learning_rate": 3.44161545215101e-05,
      "loss": 0.0,
      "step": 2130
    },
    {
      "epoch": 0.6236464735147791,
      "grad_norm": 0.00032612564973533154,
      "learning_rate": 3.440883816213052e-05,
      "loss": 0.0,
      "step": 2131
    },
    {
      "epoch": 0.623939127889962,
      "grad_norm": 0.0030479480046778917,
      "learning_rate": 3.440152180275095e-05,
      "loss": 0.0,
      "step": 2132
    },
    {
      "epoch": 0.6242317822651449,
      "grad_norm": 0.0015022482257336378,
      "learning_rate": 3.439420544337138e-05,
      "loss": 0.0,
      "step": 2133
    },
    {
      "epoch": 0.6245244366403278,
      "grad_norm": 0.0004719913122244179,
      "learning_rate": 3.4386889083991805e-05,
      "loss": 0.0,
      "step": 2134
    },
    {
      "epoch": 0.6248170910155106,
      "grad_norm": 0.0012320306850597262,
      "learning_rate": 3.437957272461223e-05,
      "loss": 0.0,
      "step": 2135
    },
    {
      "epoch": 0.6251097453906936,
      "grad_norm": 1.3019874095916748,
      "learning_rate": 3.437225636523266e-05,
      "loss": 0.0043,
      "step": 2136
    },
    {
      "epoch": 0.6254023997658765,
      "grad_norm": 0.004123196937143803,
      "learning_rate": 3.436494000585309e-05,
      "loss": 0.0001,
      "step": 2137
    },
    {
      "epoch": 0.6256950541410594,
      "grad_norm": 0.004008032847195864,
      "learning_rate": 3.435762364647352e-05,
      "loss": 0.0001,
      "step": 2138
    },
    {
      "epoch": 0.6259877085162423,
      "grad_norm": 0.000680704484693706,
      "learning_rate": 3.435030728709394e-05,
      "loss": 0.0,
      "step": 2139
    },
    {
      "epoch": 0.6262803628914252,
      "grad_norm": 0.0005675011198036373,
      "learning_rate": 3.4342990927714366e-05,
      "loss": 0.0,
      "step": 2140
    },
    {
      "epoch": 0.6265730172666082,
      "grad_norm": 0.03600315377116203,
      "learning_rate": 3.4335674568334794e-05,
      "loss": 0.0003,
      "step": 2141
    },
    {
      "epoch": 0.6268656716417911,
      "grad_norm": 0.00038307654904201627,
      "learning_rate": 3.432835820895522e-05,
      "loss": 0.0,
      "step": 2142
    },
    {
      "epoch": 0.627158326016974,
      "grad_norm": 0.0026059714145958424,
      "learning_rate": 3.432104184957565e-05,
      "loss": 0.0,
      "step": 2143
    },
    {
      "epoch": 0.6274509803921569,
      "grad_norm": 0.0010734302923083305,
      "learning_rate": 3.431372549019608e-05,
      "loss": 0.0,
      "step": 2144
    },
    {
      "epoch": 0.6277436347673397,
      "grad_norm": 0.0013862098567187786,
      "learning_rate": 3.4306409130816506e-05,
      "loss": 0.0,
      "step": 2145
    },
    {
      "epoch": 0.6280362891425226,
      "grad_norm": 0.0029694773256778717,
      "learning_rate": 3.4299092771436934e-05,
      "loss": 0.0001,
      "step": 2146
    },
    {
      "epoch": 0.6283289435177056,
      "grad_norm": 0.35501620173454285,
      "learning_rate": 3.429177641205736e-05,
      "loss": 0.0006,
      "step": 2147
    },
    {
      "epoch": 0.6286215978928885,
      "grad_norm": 0.0010110613657161593,
      "learning_rate": 3.428446005267779e-05,
      "loss": 0.0,
      "step": 2148
    },
    {
      "epoch": 0.6289142522680714,
      "grad_norm": 0.003870262997224927,
      "learning_rate": 3.427714369329821e-05,
      "loss": 0.0001,
      "step": 2149
    },
    {
      "epoch": 0.6292069066432543,
      "grad_norm": 0.0006364476284943521,
      "learning_rate": 3.426982733391864e-05,
      "loss": 0.0,
      "step": 2150
    },
    {
      "epoch": 0.6294995610184372,
      "grad_norm": 0.000568551302421838,
      "learning_rate": 3.4262510974539067e-05,
      "loss": 0.0,
      "step": 2151
    },
    {
      "epoch": 0.6297922153936202,
      "grad_norm": 0.016431095078587532,
      "learning_rate": 3.4255194615159495e-05,
      "loss": 0.0001,
      "step": 2152
    },
    {
      "epoch": 0.6300848697688031,
      "grad_norm": 5.688620090484619,
      "learning_rate": 3.424787825577992e-05,
      "loss": 0.1346,
      "step": 2153
    },
    {
      "epoch": 0.630377524143986,
      "grad_norm": 0.0009870333597064018,
      "learning_rate": 3.424056189640035e-05,
      "loss": 0.0,
      "step": 2154
    },
    {
      "epoch": 0.6306701785191688,
      "grad_norm": 0.007537742145359516,
      "learning_rate": 3.423324553702078e-05,
      "loss": 0.0001,
      "step": 2155
    },
    {
      "epoch": 0.6309628328943517,
      "grad_norm": 0.003192983567714691,
      "learning_rate": 3.4225929177641206e-05,
      "loss": 0.0001,
      "step": 2156
    },
    {
      "epoch": 0.6312554872695347,
      "grad_norm": 0.0012039679568260908,
      "learning_rate": 3.4218612818261634e-05,
      "loss": 0.0,
      "step": 2157
    },
    {
      "epoch": 0.6315481416447176,
      "grad_norm": 0.05299927666783333,
      "learning_rate": 3.421129645888206e-05,
      "loss": 0.0005,
      "step": 2158
    },
    {
      "epoch": 0.6318407960199005,
      "grad_norm": 0.11062745749950409,
      "learning_rate": 3.420398009950249e-05,
      "loss": 0.0009,
      "step": 2159
    },
    {
      "epoch": 0.6321334503950834,
      "grad_norm": 0.3257235586643219,
      "learning_rate": 3.419666374012291e-05,
      "loss": 0.002,
      "step": 2160
    },
    {
      "epoch": 0.6324261047702663,
      "grad_norm": 0.0014030158054083586,
      "learning_rate": 3.418934738074334e-05,
      "loss": 0.0,
      "step": 2161
    },
    {
      "epoch": 0.6327187591454492,
      "grad_norm": 4.451821327209473,
      "learning_rate": 3.418203102136377e-05,
      "loss": 0.2209,
      "step": 2162
    },
    {
      "epoch": 0.6330114135206322,
      "grad_norm": 0.053129829466342926,
      "learning_rate": 3.4174714661984195e-05,
      "loss": 0.0003,
      "step": 2163
    },
    {
      "epoch": 0.6333040678958151,
      "grad_norm": 0.002650732407346368,
      "learning_rate": 3.416739830260462e-05,
      "loss": 0.0001,
      "step": 2164
    },
    {
      "epoch": 0.633596722270998,
      "grad_norm": 0.0010525553952902555,
      "learning_rate": 3.416008194322505e-05,
      "loss": 0.0,
      "step": 2165
    },
    {
      "epoch": 0.6338893766461808,
      "grad_norm": 0.0010408986127004027,
      "learning_rate": 3.415276558384548e-05,
      "loss": 0.0,
      "step": 2166
    },
    {
      "epoch": 0.6341820310213637,
      "grad_norm": 0.0022852777037769556,
      "learning_rate": 3.414544922446591e-05,
      "loss": 0.0,
      "step": 2167
    },
    {
      "epoch": 0.6344746853965467,
      "grad_norm": 2.6670784950256348,
      "learning_rate": 3.4138132865086335e-05,
      "loss": 0.3112,
      "step": 2168
    },
    {
      "epoch": 0.6347673397717296,
      "grad_norm": 0.009369021281599998,
      "learning_rate": 3.413081650570676e-05,
      "loss": 0.0002,
      "step": 2169
    },
    {
      "epoch": 0.6350599941469125,
      "grad_norm": 0.004362731706351042,
      "learning_rate": 3.412350014632719e-05,
      "loss": 0.0001,
      "step": 2170
    },
    {
      "epoch": 0.6353526485220954,
      "grad_norm": 0.022487707436084747,
      "learning_rate": 3.411618378694761e-05,
      "loss": 0.0004,
      "step": 2171
    },
    {
      "epoch": 0.6356453028972783,
      "grad_norm": 0.06050792708992958,
      "learning_rate": 3.410886742756804e-05,
      "loss": 0.0009,
      "step": 2172
    },
    {
      "epoch": 0.6359379572724613,
      "grad_norm": 0.05857773497700691,
      "learning_rate": 3.410155106818847e-05,
      "loss": 0.0011,
      "step": 2173
    },
    {
      "epoch": 0.6362306116476442,
      "grad_norm": 0.5926027297973633,
      "learning_rate": 3.4094234708808896e-05,
      "loss": 0.0063,
      "step": 2174
    },
    {
      "epoch": 0.636523266022827,
      "grad_norm": 0.14563141763210297,
      "learning_rate": 3.4086918349429324e-05,
      "loss": 0.0022,
      "step": 2175
    },
    {
      "epoch": 0.6368159203980099,
      "grad_norm": 0.0783274695277214,
      "learning_rate": 3.407960199004975e-05,
      "loss": 0.0015,
      "step": 2176
    },
    {
      "epoch": 0.6371085747731928,
      "grad_norm": 0.1807767003774643,
      "learning_rate": 3.407228563067018e-05,
      "loss": 0.0022,
      "step": 2177
    },
    {
      "epoch": 0.6374012291483757,
      "grad_norm": 0.06697569787502289,
      "learning_rate": 3.406496927129061e-05,
      "loss": 0.0012,
      "step": 2178
    },
    {
      "epoch": 0.6376938835235587,
      "grad_norm": 4.3055644035339355,
      "learning_rate": 3.4057652911911036e-05,
      "loss": 0.0112,
      "step": 2179
    },
    {
      "epoch": 0.6379865378987416,
      "grad_norm": 0.01856839284300804,
      "learning_rate": 3.4050336552531464e-05,
      "loss": 0.0004,
      "step": 2180
    },
    {
      "epoch": 0.6382791922739245,
      "grad_norm": 0.027890879660844803,
      "learning_rate": 3.4043020193151885e-05,
      "loss": 0.0005,
      "step": 2181
    },
    {
      "epoch": 0.6385718466491074,
      "grad_norm": 0.030753012746572495,
      "learning_rate": 3.403570383377231e-05,
      "loss": 0.0006,
      "step": 2182
    },
    {
      "epoch": 0.6388645010242903,
      "grad_norm": 0.01352408342063427,
      "learning_rate": 3.402838747439274e-05,
      "loss": 0.0003,
      "step": 2183
    },
    {
      "epoch": 0.6391571553994733,
      "grad_norm": 0.009226124733686447,
      "learning_rate": 3.402107111501317e-05,
      "loss": 0.0002,
      "step": 2184
    },
    {
      "epoch": 0.6394498097746562,
      "grad_norm": 0.01322910189628601,
      "learning_rate": 3.4013754755633597e-05,
      "loss": 0.0003,
      "step": 2185
    },
    {
      "epoch": 0.639742464149839,
      "grad_norm": 0.015596003271639347,
      "learning_rate": 3.4006438396254024e-05,
      "loss": 0.0004,
      "step": 2186
    },
    {
      "epoch": 0.6400351185250219,
      "grad_norm": 0.006880583707243204,
      "learning_rate": 3.399912203687445e-05,
      "loss": 0.0002,
      "step": 2187
    },
    {
      "epoch": 0.6403277729002048,
      "grad_norm": 0.010393386706709862,
      "learning_rate": 3.399180567749488e-05,
      "loss": 0.0003,
      "step": 2188
    },
    {
      "epoch": 0.6406204272753878,
      "grad_norm": 0.008832590654492378,
      "learning_rate": 3.398448931811531e-05,
      "loss": 0.0002,
      "step": 2189
    },
    {
      "epoch": 0.6409130816505707,
      "grad_norm": 0.02388172410428524,
      "learning_rate": 3.3977172958735736e-05,
      "loss": 0.0003,
      "step": 2190
    },
    {
      "epoch": 0.6412057360257536,
      "grad_norm": 0.04809088632464409,
      "learning_rate": 3.3969856599356164e-05,
      "loss": 0.0004,
      "step": 2191
    },
    {
      "epoch": 0.6414983904009365,
      "grad_norm": 0.003923215437680483,
      "learning_rate": 3.3962540239976585e-05,
      "loss": 0.0001,
      "step": 2192
    },
    {
      "epoch": 0.6417910447761194,
      "grad_norm": 0.007215586956590414,
      "learning_rate": 3.395522388059701e-05,
      "loss": 0.0002,
      "step": 2193
    },
    {
      "epoch": 0.6420836991513024,
      "grad_norm": 0.0031919616740196943,
      "learning_rate": 3.394790752121744e-05,
      "loss": 0.0001,
      "step": 2194
    },
    {
      "epoch": 0.6423763535264853,
      "grad_norm": 14.256848335266113,
      "learning_rate": 3.394059116183787e-05,
      "loss": 0.1273,
      "step": 2195
    },
    {
      "epoch": 0.6426690079016681,
      "grad_norm": 0.0035253202077001333,
      "learning_rate": 3.39332748024583e-05,
      "loss": 0.0001,
      "step": 2196
    },
    {
      "epoch": 0.642961662276851,
      "grad_norm": 1.0613200664520264,
      "learning_rate": 3.3925958443078725e-05,
      "loss": 0.0027,
      "step": 2197
    },
    {
      "epoch": 0.6432543166520339,
      "grad_norm": 0.004639477469027042,
      "learning_rate": 3.391864208369915e-05,
      "loss": 0.0001,
      "step": 2198
    },
    {
      "epoch": 0.6435469710272168,
      "grad_norm": 0.0037832509260624647,
      "learning_rate": 3.391132572431958e-05,
      "loss": 0.0001,
      "step": 2199
    },
    {
      "epoch": 0.6438396254023998,
      "grad_norm": 0.004914381075650454,
      "learning_rate": 3.390400936494001e-05,
      "loss": 0.0001,
      "step": 2200
    },
    {
      "epoch": 0.6441322797775827,
      "grad_norm": 0.01405208557844162,
      "learning_rate": 3.389669300556044e-05,
      "loss": 0.0002,
      "step": 2201
    },
    {
      "epoch": 0.6444249341527656,
      "grad_norm": 0.03063022904098034,
      "learning_rate": 3.388937664618086e-05,
      "loss": 0.0004,
      "step": 2202
    },
    {
      "epoch": 0.6447175885279485,
      "grad_norm": 0.00888260081410408,
      "learning_rate": 3.3882060286801286e-05,
      "loss": 0.0001,
      "step": 2203
    },
    {
      "epoch": 0.6450102429031314,
      "grad_norm": 0.2707194983959198,
      "learning_rate": 3.3874743927421714e-05,
      "loss": 0.0012,
      "step": 2204
    },
    {
      "epoch": 0.6453028972783144,
      "grad_norm": 11.670083045959473,
      "learning_rate": 3.386742756804214e-05,
      "loss": 0.3591,
      "step": 2205
    },
    {
      "epoch": 0.6455955516534972,
      "grad_norm": 0.04171549156308174,
      "learning_rate": 3.386011120866257e-05,
      "loss": 0.0003,
      "step": 2206
    },
    {
      "epoch": 0.6458882060286801,
      "grad_norm": 0.004247634205967188,
      "learning_rate": 3.3852794849283e-05,
      "loss": 0.0001,
      "step": 2207
    },
    {
      "epoch": 0.646180860403863,
      "grad_norm": 0.00270745693705976,
      "learning_rate": 3.3845478489903426e-05,
      "loss": 0.0001,
      "step": 2208
    },
    {
      "epoch": 0.6464735147790459,
      "grad_norm": 0.015967080369591713,
      "learning_rate": 3.3838162130523854e-05,
      "loss": 0.0002,
      "step": 2209
    },
    {
      "epoch": 0.6467661691542289,
      "grad_norm": 0.006121322978287935,
      "learning_rate": 3.383084577114428e-05,
      "loss": 0.0001,
      "step": 2210
    },
    {
      "epoch": 0.6470588235294118,
      "grad_norm": 0.010700903832912445,
      "learning_rate": 3.382352941176471e-05,
      "loss": 0.0002,
      "step": 2211
    },
    {
      "epoch": 0.6473514779045947,
      "grad_norm": 5.404544353485107,
      "learning_rate": 3.381621305238514e-05,
      "loss": 0.1908,
      "step": 2212
    },
    {
      "epoch": 0.6476441322797776,
      "grad_norm": 5.264749526977539,
      "learning_rate": 3.380889669300556e-05,
      "loss": 0.0133,
      "step": 2213
    },
    {
      "epoch": 0.6479367866549605,
      "grad_norm": 0.004720195196568966,
      "learning_rate": 3.380158033362599e-05,
      "loss": 0.0001,
      "step": 2214
    },
    {
      "epoch": 0.6482294410301433,
      "grad_norm": 0.006057473830878735,
      "learning_rate": 3.3794263974246415e-05,
      "loss": 0.0001,
      "step": 2215
    },
    {
      "epoch": 0.6485220954053263,
      "grad_norm": 0.003476094687357545,
      "learning_rate": 3.378694761486684e-05,
      "loss": 0.0001,
      "step": 2216
    },
    {
      "epoch": 0.6488147497805092,
      "grad_norm": 0.0119353411719203,
      "learning_rate": 3.377963125548727e-05,
      "loss": 0.0002,
      "step": 2217
    },
    {
      "epoch": 0.6491074041556921,
      "grad_norm": 0.004302394576370716,
      "learning_rate": 3.37723148961077e-05,
      "loss": 0.0001,
      "step": 2218
    },
    {
      "epoch": 0.649400058530875,
      "grad_norm": 0.0031735720112919807,
      "learning_rate": 3.3764998536728127e-05,
      "loss": 0.0001,
      "step": 2219
    },
    {
      "epoch": 0.6496927129060579,
      "grad_norm": 0.003988498356193304,
      "learning_rate": 3.3757682177348554e-05,
      "loss": 0.0001,
      "step": 2220
    },
    {
      "epoch": 0.6499853672812409,
      "grad_norm": 0.004030546173453331,
      "learning_rate": 3.375036581796898e-05,
      "loss": 0.0001,
      "step": 2221
    },
    {
      "epoch": 0.6502780216564238,
      "grad_norm": 0.011398413218557835,
      "learning_rate": 3.374304945858941e-05,
      "loss": 0.0003,
      "step": 2222
    },
    {
      "epoch": 0.6505706760316067,
      "grad_norm": 0.011405447497963905,
      "learning_rate": 3.373573309920984e-05,
      "loss": 0.0002,
      "step": 2223
    },
    {
      "epoch": 0.6508633304067896,
      "grad_norm": 0.01702878251671791,
      "learning_rate": 3.372841673983026e-05,
      "loss": 0.0003,
      "step": 2224
    },
    {
      "epoch": 0.6511559847819725,
      "grad_norm": 11.716462135314941,
      "learning_rate": 3.372110038045069e-05,
      "loss": 0.0334,
      "step": 2225
    },
    {
      "epoch": 0.6514486391571555,
      "grad_norm": 0.019067659974098206,
      "learning_rate": 3.3713784021071115e-05,
      "loss": 0.0002,
      "step": 2226
    },
    {
      "epoch": 0.6517412935323383,
      "grad_norm": 0.044808294624090195,
      "learning_rate": 3.370646766169154e-05,
      "loss": 0.0005,
      "step": 2227
    },
    {
      "epoch": 0.6520339479075212,
      "grad_norm": 0.1213802695274353,
      "learning_rate": 3.369915130231197e-05,
      "loss": 0.0007,
      "step": 2228
    },
    {
      "epoch": 0.6523266022827041,
      "grad_norm": 0.00575533602386713,
      "learning_rate": 3.36918349429324e-05,
      "loss": 0.0002,
      "step": 2229
    },
    {
      "epoch": 0.652619256657887,
      "grad_norm": 0.07758716493844986,
      "learning_rate": 3.368451858355283e-05,
      "loss": 0.0007,
      "step": 2230
    },
    {
      "epoch": 0.6529119110330699,
      "grad_norm": 0.0531279556453228,
      "learning_rate": 3.3677202224173255e-05,
      "loss": 0.0005,
      "step": 2231
    },
    {
      "epoch": 0.6532045654082529,
      "grad_norm": 0.0054940213449299335,
      "learning_rate": 3.366988586479368e-05,
      "loss": 0.0001,
      "step": 2232
    },
    {
      "epoch": 0.6534972197834358,
      "grad_norm": 0.09491506218910217,
      "learning_rate": 3.366256950541411e-05,
      "loss": 0.0005,
      "step": 2233
    },
    {
      "epoch": 0.6537898741586187,
      "grad_norm": 0.00495274318382144,
      "learning_rate": 3.365525314603453e-05,
      "loss": 0.0001,
      "step": 2234
    },
    {
      "epoch": 0.6540825285338016,
      "grad_norm": 0.007331415545195341,
      "learning_rate": 3.364793678665496e-05,
      "loss": 0.0001,
      "step": 2235
    },
    {
      "epoch": 0.6543751829089844,
      "grad_norm": 0.008214449509978294,
      "learning_rate": 3.364062042727539e-05,
      "loss": 0.0002,
      "step": 2236
    },
    {
      "epoch": 0.6546678372841674,
      "grad_norm": 0.004883881658315659,
      "learning_rate": 3.3633304067895816e-05,
      "loss": 0.0001,
      "step": 2237
    },
    {
      "epoch": 0.6549604916593503,
      "grad_norm": 0.00528697669506073,
      "learning_rate": 3.3625987708516244e-05,
      "loss": 0.0001,
      "step": 2238
    },
    {
      "epoch": 0.6552531460345332,
      "grad_norm": 5.757792949676514,
      "learning_rate": 3.361867134913667e-05,
      "loss": 0.0566,
      "step": 2239
    },
    {
      "epoch": 0.6555458004097161,
      "grad_norm": 0.1155574768781662,
      "learning_rate": 3.36113549897571e-05,
      "loss": 0.001,
      "step": 2240
    },
    {
      "epoch": 0.655838454784899,
      "grad_norm": 0.003949552774429321,
      "learning_rate": 3.360403863037753e-05,
      "loss": 0.0001,
      "step": 2241
    },
    {
      "epoch": 0.656131109160082,
      "grad_norm": 0.005053091794252396,
      "learning_rate": 3.3596722270997956e-05,
      "loss": 0.0001,
      "step": 2242
    },
    {
      "epoch": 0.6564237635352649,
      "grad_norm": 0.0025514643639326096,
      "learning_rate": 3.3589405911618384e-05,
      "loss": 0.0001,
      "step": 2243
    },
    {
      "epoch": 0.6567164179104478,
      "grad_norm": 0.0027982911560684443,
      "learning_rate": 3.358208955223881e-05,
      "loss": 0.0001,
      "step": 2244
    },
    {
      "epoch": 0.6570090722856307,
      "grad_norm": 0.13875851035118103,
      "learning_rate": 3.357477319285923e-05,
      "loss": 0.0006,
      "step": 2245
    },
    {
      "epoch": 0.6573017266608135,
      "grad_norm": 0.002096978249028325,
      "learning_rate": 3.356745683347966e-05,
      "loss": 0.0001,
      "step": 2246
    },
    {
      "epoch": 0.6575943810359964,
      "grad_norm": 0.6293127536773682,
      "learning_rate": 3.356014047410009e-05,
      "loss": 0.0022,
      "step": 2247
    },
    {
      "epoch": 0.6578870354111794,
      "grad_norm": 0.001957373693585396,
      "learning_rate": 3.355282411472052e-05,
      "loss": 0.0001,
      "step": 2248
    },
    {
      "epoch": 0.6581796897863623,
      "grad_norm": 0.03885246813297272,
      "learning_rate": 3.3545507755340945e-05,
      "loss": 0.0003,
      "step": 2249
    },
    {
      "epoch": 0.6584723441615452,
      "grad_norm": 0.00180476950481534,
      "learning_rate": 3.353819139596137e-05,
      "loss": 0.0,
      "step": 2250
    },
    {
      "epoch": 0.6587649985367281,
      "grad_norm": 0.008187128230929375,
      "learning_rate": 3.35308750365818e-05,
      "loss": 0.0002,
      "step": 2251
    },
    {
      "epoch": 0.659057652911911,
      "grad_norm": 0.0025216848589479923,
      "learning_rate": 3.352355867720223e-05,
      "loss": 0.0001,
      "step": 2252
    },
    {
      "epoch": 0.659350307287094,
      "grad_norm": 0.0019639465026557446,
      "learning_rate": 3.3516242317822657e-05,
      "loss": 0.0001,
      "step": 2253
    },
    {
      "epoch": 0.6596429616622769,
      "grad_norm": 0.04376494884490967,
      "learning_rate": 3.3508925958443084e-05,
      "loss": 0.0002,
      "step": 2254
    },
    {
      "epoch": 0.6599356160374598,
      "grad_norm": 0.38673028349876404,
      "learning_rate": 3.350160959906351e-05,
      "loss": 0.001,
      "step": 2255
    },
    {
      "epoch": 0.6602282704126426,
      "grad_norm": 0.005738749168813229,
      "learning_rate": 3.3494293239683934e-05,
      "loss": 0.0001,
      "step": 2256
    },
    {
      "epoch": 0.6605209247878255,
      "grad_norm": 2.2870633602142334,
      "learning_rate": 3.348697688030436e-05,
      "loss": 0.0064,
      "step": 2257
    },
    {
      "epoch": 0.6608135791630085,
      "grad_norm": 0.0019669472239911556,
      "learning_rate": 3.347966052092479e-05,
      "loss": 0.0,
      "step": 2258
    },
    {
      "epoch": 0.6611062335381914,
      "grad_norm": 0.002588227391242981,
      "learning_rate": 3.347234416154522e-05,
      "loss": 0.0001,
      "step": 2259
    },
    {
      "epoch": 0.6613988879133743,
      "grad_norm": 0.001294539077207446,
      "learning_rate": 3.3465027802165645e-05,
      "loss": 0.0,
      "step": 2260
    },
    {
      "epoch": 0.6616915422885572,
      "grad_norm": 0.00048353031161241233,
      "learning_rate": 3.345771144278607e-05,
      "loss": 0.0,
      "step": 2261
    },
    {
      "epoch": 0.6619841966637401,
      "grad_norm": 0.0015470280777662992,
      "learning_rate": 3.34503950834065e-05,
      "loss": 0.0,
      "step": 2262
    },
    {
      "epoch": 0.6622768510389231,
      "grad_norm": 4.904445171356201,
      "learning_rate": 3.344307872402693e-05,
      "loss": 0.011,
      "step": 2263
    },
    {
      "epoch": 0.662569505414106,
      "grad_norm": 2.363774538040161,
      "learning_rate": 3.343576236464736e-05,
      "loss": 0.0055,
      "step": 2264
    },
    {
      "epoch": 0.6628621597892889,
      "grad_norm": 0.0030767517164349556,
      "learning_rate": 3.3428446005267785e-05,
      "loss": 0.0001,
      "step": 2265
    },
    {
      "epoch": 0.6631548141644718,
      "grad_norm": 0.008060317486524582,
      "learning_rate": 3.3421129645888206e-05,
      "loss": 0.0001,
      "step": 2266
    },
    {
      "epoch": 0.6634474685396546,
      "grad_norm": 0.03407042846083641,
      "learning_rate": 3.3413813286508634e-05,
      "loss": 0.0002,
      "step": 2267
    },
    {
      "epoch": 0.6637401229148375,
      "grad_norm": 0.0033131930977106094,
      "learning_rate": 3.340649692712906e-05,
      "loss": 0.0001,
      "step": 2268
    },
    {
      "epoch": 0.6640327772900205,
      "grad_norm": 0.007835319265723228,
      "learning_rate": 3.339918056774949e-05,
      "loss": 0.0001,
      "step": 2269
    },
    {
      "epoch": 0.6643254316652034,
      "grad_norm": 0.001285523409023881,
      "learning_rate": 3.339186420836992e-05,
      "loss": 0.0,
      "step": 2270
    },
    {
      "epoch": 0.6646180860403863,
      "grad_norm": 0.010365760885179043,
      "learning_rate": 3.3384547848990346e-05,
      "loss": 0.0001,
      "step": 2271
    },
    {
      "epoch": 0.6649107404155692,
      "grad_norm": 0.0191858671605587,
      "learning_rate": 3.3377231489610774e-05,
      "loss": 0.0001,
      "step": 2272
    },
    {
      "epoch": 0.6652033947907521,
      "grad_norm": 15.162426948547363,
      "learning_rate": 3.33699151302312e-05,
      "loss": 0.0439,
      "step": 2273
    },
    {
      "epoch": 0.6654960491659351,
      "grad_norm": 0.0013493781443685293,
      "learning_rate": 3.336259877085163e-05,
      "loss": 0.0,
      "step": 2274
    },
    {
      "epoch": 0.665788703541118,
      "grad_norm": 0.0016791290836408734,
      "learning_rate": 3.335528241147206e-05,
      "loss": 0.0,
      "step": 2275
    },
    {
      "epoch": 0.6660813579163009,
      "grad_norm": 0.001705312286503613,
      "learning_rate": 3.3347966052092486e-05,
      "loss": 0.0,
      "step": 2276
    },
    {
      "epoch": 0.6663740122914837,
      "grad_norm": 0.001440341817215085,
      "learning_rate": 3.334064969271291e-05,
      "loss": 0.0,
      "step": 2277
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 2.711886167526245,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.0065,
      "step": 2278
    },
    {
      "epoch": 0.6669593210418496,
      "grad_norm": 0.00304962950758636,
      "learning_rate": 3.332601697395376e-05,
      "loss": 0.0001,
      "step": 2279
    },
    {
      "epoch": 0.6672519754170325,
      "grad_norm": 5.063312530517578,
      "learning_rate": 3.331870061457419e-05,
      "loss": 0.2016,
      "step": 2280
    },
    {
      "epoch": 0.6675446297922154,
      "grad_norm": 0.013917754404246807,
      "learning_rate": 3.331138425519462e-05,
      "loss": 0.0001,
      "step": 2281
    },
    {
      "epoch": 0.6678372841673983,
      "grad_norm": 0.0017429639119654894,
      "learning_rate": 3.330406789581505e-05,
      "loss": 0.0,
      "step": 2282
    },
    {
      "epoch": 0.6681299385425812,
      "grad_norm": 0.0008035599021241069,
      "learning_rate": 3.3296751536435475e-05,
      "loss": 0.0,
      "step": 2283
    },
    {
      "epoch": 0.6684225929177641,
      "grad_norm": 0.0014676072169095278,
      "learning_rate": 3.32894351770559e-05,
      "loss": 0.0,
      "step": 2284
    },
    {
      "epoch": 0.6687152472929471,
      "grad_norm": 0.000980646931566298,
      "learning_rate": 3.328211881767633e-05,
      "loss": 0.0,
      "step": 2285
    },
    {
      "epoch": 0.66900790166813,
      "grad_norm": 5.8885579109191895,
      "learning_rate": 3.327480245829676e-05,
      "loss": 0.0173,
      "step": 2286
    },
    {
      "epoch": 0.6693005560433128,
      "grad_norm": 0.0011138279223814607,
      "learning_rate": 3.326748609891718e-05,
      "loss": 0.0,
      "step": 2287
    },
    {
      "epoch": 0.6695932104184957,
      "grad_norm": 1.5539418458938599,
      "learning_rate": 3.326016973953761e-05,
      "loss": 0.0044,
      "step": 2288
    },
    {
      "epoch": 0.6698858647936786,
      "grad_norm": 0.0023576596286147833,
      "learning_rate": 3.3252853380158036e-05,
      "loss": 0.0001,
      "step": 2289
    },
    {
      "epoch": 0.6701785191688616,
      "grad_norm": 0.00780422193929553,
      "learning_rate": 3.3245537020778464e-05,
      "loss": 0.0001,
      "step": 2290
    },
    {
      "epoch": 0.6704711735440445,
      "grad_norm": 0.004599100910127163,
      "learning_rate": 3.323822066139889e-05,
      "loss": 0.0001,
      "step": 2291
    },
    {
      "epoch": 0.6707638279192274,
      "grad_norm": 0.0034668464213609695,
      "learning_rate": 3.323090430201932e-05,
      "loss": 0.0001,
      "step": 2292
    },
    {
      "epoch": 0.6710564822944103,
      "grad_norm": 0.155696839094162,
      "learning_rate": 3.322358794263975e-05,
      "loss": 0.0009,
      "step": 2293
    },
    {
      "epoch": 0.6713491366695932,
      "grad_norm": 0.0032399471383541822,
      "learning_rate": 3.3216271583260175e-05,
      "loss": 0.0001,
      "step": 2294
    },
    {
      "epoch": 0.6716417910447762,
      "grad_norm": 2.8677914142608643,
      "learning_rate": 3.32089552238806e-05,
      "loss": 0.0065,
      "step": 2295
    },
    {
      "epoch": 0.6719344454199591,
      "grad_norm": 0.001276138355024159,
      "learning_rate": 3.320163886450103e-05,
      "loss": 0.0,
      "step": 2296
    },
    {
      "epoch": 0.672227099795142,
      "grad_norm": 0.0016609752783551812,
      "learning_rate": 3.319432250512145e-05,
      "loss": 0.0,
      "step": 2297
    },
    {
      "epoch": 0.6725197541703248,
      "grad_norm": 0.0012457565171644092,
      "learning_rate": 3.318700614574188e-05,
      "loss": 0.0,
      "step": 2298
    },
    {
      "epoch": 0.6728124085455077,
      "grad_norm": 0.000976709881797433,
      "learning_rate": 3.317968978636231e-05,
      "loss": 0.0,
      "step": 2299
    },
    {
      "epoch": 0.6731050629206906,
      "grad_norm": 0.00264275586232543,
      "learning_rate": 3.3172373426982736e-05,
      "loss": 0.0001,
      "step": 2300
    },
    {
      "epoch": 0.6733977172958736,
      "grad_norm": 0.0018932694802060723,
      "learning_rate": 3.3165057067603164e-05,
      "loss": 0.0,
      "step": 2301
    },
    {
      "epoch": 0.6736903716710565,
      "grad_norm": 0.006679253187030554,
      "learning_rate": 3.315774070822359e-05,
      "loss": 0.0001,
      "step": 2302
    },
    {
      "epoch": 0.6739830260462394,
      "grad_norm": 0.0025321717839688063,
      "learning_rate": 3.315042434884402e-05,
      "loss": 0.0001,
      "step": 2303
    },
    {
      "epoch": 0.6742756804214223,
      "grad_norm": 0.00163306575268507,
      "learning_rate": 3.314310798946445e-05,
      "loss": 0.0,
      "step": 2304
    },
    {
      "epoch": 0.6745683347966052,
      "grad_norm": 0.0017003427492454648,
      "learning_rate": 3.313579163008487e-05,
      "loss": 0.0,
      "step": 2305
    },
    {
      "epoch": 0.6748609891717882,
      "grad_norm": 0.0021983168553560972,
      "learning_rate": 3.31284752707053e-05,
      "loss": 0.0001,
      "step": 2306
    },
    {
      "epoch": 0.675153643546971,
      "grad_norm": 0.0022590961307287216,
      "learning_rate": 3.3121158911325725e-05,
      "loss": 0.0,
      "step": 2307
    },
    {
      "epoch": 0.6754462979221539,
      "grad_norm": 0.00828596856445074,
      "learning_rate": 3.311384255194615e-05,
      "loss": 0.0001,
      "step": 2308
    },
    {
      "epoch": 0.6757389522973368,
      "grad_norm": 0.006290559656918049,
      "learning_rate": 3.310652619256658e-05,
      "loss": 0.0001,
      "step": 2309
    },
    {
      "epoch": 0.6760316066725197,
      "grad_norm": 0.0021867703180760145,
      "learning_rate": 3.309920983318701e-05,
      "loss": 0.0,
      "step": 2310
    },
    {
      "epoch": 0.6763242610477027,
      "grad_norm": 0.001774878241121769,
      "learning_rate": 3.309189347380744e-05,
      "loss": 0.0,
      "step": 2311
    },
    {
      "epoch": 0.6766169154228856,
      "grad_norm": 10.279358863830566,
      "learning_rate": 3.3084577114427865e-05,
      "loss": 0.077,
      "step": 2312
    },
    {
      "epoch": 0.6769095697980685,
      "grad_norm": 0.002121191006153822,
      "learning_rate": 3.3077260755048286e-05,
      "loss": 0.0,
      "step": 2313
    },
    {
      "epoch": 0.6772022241732514,
      "grad_norm": 2.7408127784729004,
      "learning_rate": 3.3069944395668714e-05,
      "loss": 0.234,
      "step": 2314
    },
    {
      "epoch": 0.6774948785484343,
      "grad_norm": 0.0018349678721278906,
      "learning_rate": 3.306262803628914e-05,
      "loss": 0.0,
      "step": 2315
    },
    {
      "epoch": 0.6777875329236172,
      "grad_norm": 4.3838348388671875,
      "learning_rate": 3.305531167690957e-05,
      "loss": 0.1928,
      "step": 2316
    },
    {
      "epoch": 0.6780801872988002,
      "grad_norm": 0.0037601948715746403,
      "learning_rate": 3.304799531753e-05,
      "loss": 0.0001,
      "step": 2317
    },
    {
      "epoch": 0.678372841673983,
      "grad_norm": 0.06992469727993011,
      "learning_rate": 3.3040678958150426e-05,
      "loss": 0.0008,
      "step": 2318
    },
    {
      "epoch": 0.6786654960491659,
      "grad_norm": 20.966833114624023,
      "learning_rate": 3.3033362598770854e-05,
      "loss": 0.0917,
      "step": 2319
    },
    {
      "epoch": 0.6789581504243488,
      "grad_norm": 0.028865372762084007,
      "learning_rate": 3.302604623939128e-05,
      "loss": 0.0005,
      "step": 2320
    },
    {
      "epoch": 0.6792508047995317,
      "grad_norm": 0.012698938138782978,
      "learning_rate": 3.30187298800117e-05,
      "loss": 0.0003,
      "step": 2321
    },
    {
      "epoch": 0.6795434591747147,
      "grad_norm": 0.006503901444375515,
      "learning_rate": 3.301141352063213e-05,
      "loss": 0.0001,
      "step": 2322
    },
    {
      "epoch": 0.6798361135498976,
      "grad_norm": 0.12220772355794907,
      "learning_rate": 3.300409716125256e-05,
      "loss": 0.0017,
      "step": 2323
    },
    {
      "epoch": 0.6801287679250805,
      "grad_norm": 0.011729221791028976,
      "learning_rate": 3.299678080187299e-05,
      "loss": 0.0002,
      "step": 2324
    },
    {
      "epoch": 0.6804214223002634,
      "grad_norm": 0.07524167001247406,
      "learning_rate": 3.2989464442493415e-05,
      "loss": 0.0013,
      "step": 2325
    },
    {
      "epoch": 0.6807140766754463,
      "grad_norm": 2.473724842071533,
      "learning_rate": 3.298214808311384e-05,
      "loss": 0.0126,
      "step": 2326
    },
    {
      "epoch": 0.6810067310506293,
      "grad_norm": 0.01172594353556633,
      "learning_rate": 3.297483172373427e-05,
      "loss": 0.0002,
      "step": 2327
    },
    {
      "epoch": 0.6812993854258121,
      "grad_norm": 0.4100677967071533,
      "learning_rate": 3.29675153643547e-05,
      "loss": 0.0046,
      "step": 2328
    },
    {
      "epoch": 0.681592039800995,
      "grad_norm": 0.0013916159514337778,
      "learning_rate": 3.2960199004975127e-05,
      "loss": 0.0,
      "step": 2329
    },
    {
      "epoch": 0.6818846941761779,
      "grad_norm": 0.022183818742632866,
      "learning_rate": 3.295288264559555e-05,
      "loss": 0.0004,
      "step": 2330
    },
    {
      "epoch": 0.6821773485513608,
      "grad_norm": 0.014194161631166935,
      "learning_rate": 3.2945566286215976e-05,
      "loss": 0.0003,
      "step": 2331
    },
    {
      "epoch": 0.6824700029265438,
      "grad_norm": 0.010851086117327213,
      "learning_rate": 3.2938249926836404e-05,
      "loss": 0.0003,
      "step": 2332
    },
    {
      "epoch": 0.6827626573017267,
      "grad_norm": 0.004293947480618954,
      "learning_rate": 3.293093356745683e-05,
      "loss": 0.0001,
      "step": 2333
    },
    {
      "epoch": 0.6830553116769096,
      "grad_norm": 0.009280310943722725,
      "learning_rate": 3.292361720807726e-05,
      "loss": 0.0002,
      "step": 2334
    },
    {
      "epoch": 0.6833479660520925,
      "grad_norm": 0.008533764630556107,
      "learning_rate": 3.291630084869769e-05,
      "loss": 0.0002,
      "step": 2335
    },
    {
      "epoch": 0.6836406204272754,
      "grad_norm": 0.006480607204139233,
      "learning_rate": 3.2908984489318115e-05,
      "loss": 0.0002,
      "step": 2336
    },
    {
      "epoch": 0.6839332748024582,
      "grad_norm": 0.011746611446142197,
      "learning_rate": 3.290166812993854e-05,
      "loss": 0.0003,
      "step": 2337
    },
    {
      "epoch": 0.6842259291776412,
      "grad_norm": 0.010013306513428688,
      "learning_rate": 3.289435177055897e-05,
      "loss": 0.0002,
      "step": 2338
    },
    {
      "epoch": 0.6845185835528241,
      "grad_norm": 0.012896952219307423,
      "learning_rate": 3.28870354111794e-05,
      "loss": 0.0003,
      "step": 2339
    },
    {
      "epoch": 0.684811237928007,
      "grad_norm": 0.00540511030703783,
      "learning_rate": 3.287971905179982e-05,
      "loss": 0.0002,
      "step": 2340
    },
    {
      "epoch": 0.6851038923031899,
      "grad_norm": 5.056729316711426,
      "learning_rate": 3.287240269242025e-05,
      "loss": 0.069,
      "step": 2341
    },
    {
      "epoch": 0.6853965466783728,
      "grad_norm": 0.05942735821008682,
      "learning_rate": 3.2865086333040676e-05,
      "loss": 0.0005,
      "step": 2342
    },
    {
      "epoch": 0.6856892010535558,
      "grad_norm": 0.01774713769555092,
      "learning_rate": 3.2857769973661104e-05,
      "loss": 0.0004,
      "step": 2343
    },
    {
      "epoch": 0.6859818554287387,
      "grad_norm": 0.14372679591178894,
      "learning_rate": 3.285045361428153e-05,
      "loss": 0.0009,
      "step": 2344
    },
    {
      "epoch": 0.6862745098039216,
      "grad_norm": 0.0036756719928234816,
      "learning_rate": 3.284313725490196e-05,
      "loss": 0.0001,
      "step": 2345
    },
    {
      "epoch": 0.6865671641791045,
      "grad_norm": 0.0028616893105208874,
      "learning_rate": 3.283582089552239e-05,
      "loss": 0.0001,
      "step": 2346
    },
    {
      "epoch": 0.6868598185542873,
      "grad_norm": 0.00771837355569005,
      "learning_rate": 3.2828504536142816e-05,
      "loss": 0.0002,
      "step": 2347
    },
    {
      "epoch": 0.6871524729294703,
      "grad_norm": 5.727898120880127,
      "learning_rate": 3.2821188176763244e-05,
      "loss": 0.0814,
      "step": 2348
    },
    {
      "epoch": 0.6874451273046532,
      "grad_norm": 0.005702680442482233,
      "learning_rate": 3.281387181738367e-05,
      "loss": 0.0001,
      "step": 2349
    },
    {
      "epoch": 0.6877377816798361,
      "grad_norm": 0.008673261851072311,
      "learning_rate": 3.28065554580041e-05,
      "loss": 0.0002,
      "step": 2350
    },
    {
      "epoch": 0.688030436055019,
      "grad_norm": 0.011376095935702324,
      "learning_rate": 3.279923909862452e-05,
      "loss": 0.0002,
      "step": 2351
    },
    {
      "epoch": 0.6883230904302019,
      "grad_norm": 0.021226489916443825,
      "learning_rate": 3.279192273924495e-05,
      "loss": 0.0003,
      "step": 2352
    },
    {
      "epoch": 0.6886157448053848,
      "grad_norm": 0.05789355933666229,
      "learning_rate": 3.278460637986538e-05,
      "loss": 0.001,
      "step": 2353
    },
    {
      "epoch": 0.6889083991805678,
      "grad_norm": 0.18417315185070038,
      "learning_rate": 3.2777290020485805e-05,
      "loss": 0.0012,
      "step": 2354
    },
    {
      "epoch": 0.6892010535557507,
      "grad_norm": 0.016051622107625008,
      "learning_rate": 3.276997366110623e-05,
      "loss": 0.0003,
      "step": 2355
    },
    {
      "epoch": 0.6894937079309336,
      "grad_norm": 0.040032170712947845,
      "learning_rate": 3.276265730172666e-05,
      "loss": 0.0005,
      "step": 2356
    },
    {
      "epoch": 0.6897863623061165,
      "grad_norm": 14.222600936889648,
      "learning_rate": 3.275534094234709e-05,
      "loss": 0.1511,
      "step": 2357
    },
    {
      "epoch": 0.6900790166812993,
      "grad_norm": 0.10259919613599777,
      "learning_rate": 3.274802458296752e-05,
      "loss": 0.0014,
      "step": 2358
    },
    {
      "epoch": 0.6903716710564823,
      "grad_norm": 0.026048608124256134,
      "learning_rate": 3.2740708223587945e-05,
      "loss": 0.0004,
      "step": 2359
    },
    {
      "epoch": 0.6906643254316652,
      "grad_norm": 0.031028462573885918,
      "learning_rate": 3.273339186420837e-05,
      "loss": 0.0006,
      "step": 2360
    },
    {
      "epoch": 0.6909569798068481,
      "grad_norm": 0.018124297261238098,
      "learning_rate": 3.27260755048288e-05,
      "loss": 0.0003,
      "step": 2361
    },
    {
      "epoch": 0.691249634182031,
      "grad_norm": 0.00908717978745699,
      "learning_rate": 3.271875914544922e-05,
      "loss": 0.0002,
      "step": 2362
    },
    {
      "epoch": 0.6915422885572139,
      "grad_norm": 0.004855715204030275,
      "learning_rate": 3.271144278606965e-05,
      "loss": 0.0001,
      "step": 2363
    },
    {
      "epoch": 0.6918349429323969,
      "grad_norm": 0.02341291308403015,
      "learning_rate": 3.270412642669008e-05,
      "loss": 0.0004,
      "step": 2364
    },
    {
      "epoch": 0.6921275973075798,
      "grad_norm": 0.05145770311355591,
      "learning_rate": 3.2696810067310506e-05,
      "loss": 0.0007,
      "step": 2365
    },
    {
      "epoch": 0.6924202516827627,
      "grad_norm": 0.027379680424928665,
      "learning_rate": 3.2689493707930934e-05,
      "loss": 0.0004,
      "step": 2366
    },
    {
      "epoch": 0.6927129060579456,
      "grad_norm": 0.010610656812787056,
      "learning_rate": 3.268217734855136e-05,
      "loss": 0.0002,
      "step": 2367
    },
    {
      "epoch": 0.6930055604331284,
      "grad_norm": 0.06512395292520523,
      "learning_rate": 3.267486098917179e-05,
      "loss": 0.0005,
      "step": 2368
    },
    {
      "epoch": 0.6932982148083113,
      "grad_norm": 0.004508286714553833,
      "learning_rate": 3.266754462979222e-05,
      "loss": 0.0001,
      "step": 2369
    },
    {
      "epoch": 0.6935908691834943,
      "grad_norm": 0.04418289661407471,
      "learning_rate": 3.2660228270412645e-05,
      "loss": 0.0004,
      "step": 2370
    },
    {
      "epoch": 0.6938835235586772,
      "grad_norm": 0.04784709960222244,
      "learning_rate": 3.265291191103307e-05,
      "loss": 0.0002,
      "step": 2371
    },
    {
      "epoch": 0.6941761779338601,
      "grad_norm": 5.457385540008545,
      "learning_rate": 3.2645595551653494e-05,
      "loss": 0.0266,
      "step": 2372
    },
    {
      "epoch": 0.694468832309043,
      "grad_norm": 0.004906444810330868,
      "learning_rate": 3.263827919227392e-05,
      "loss": 0.0001,
      "step": 2373
    },
    {
      "epoch": 0.6947614866842259,
      "grad_norm": 0.0034356743562966585,
      "learning_rate": 3.263096283289435e-05,
      "loss": 0.0001,
      "step": 2374
    },
    {
      "epoch": 0.6950541410594089,
      "grad_norm": 0.0035204212181270123,
      "learning_rate": 3.262364647351478e-05,
      "loss": 0.0001,
      "step": 2375
    },
    {
      "epoch": 0.6953467954345918,
      "grad_norm": 0.006286072079092264,
      "learning_rate": 3.2616330114135206e-05,
      "loss": 0.0001,
      "step": 2376
    },
    {
      "epoch": 0.6956394498097747,
      "grad_norm": 0.021691780537366867,
      "learning_rate": 3.2609013754755634e-05,
      "loss": 0.0002,
      "step": 2377
    },
    {
      "epoch": 0.6959321041849575,
      "grad_norm": 0.0023452823515981436,
      "learning_rate": 3.260169739537606e-05,
      "loss": 0.0001,
      "step": 2378
    },
    {
      "epoch": 0.6962247585601404,
      "grad_norm": 0.0005349525599740446,
      "learning_rate": 3.259438103599649e-05,
      "loss": 0.0,
      "step": 2379
    },
    {
      "epoch": 0.6965174129353234,
      "grad_norm": 0.01140750851482153,
      "learning_rate": 3.258706467661692e-05,
      "loss": 0.0002,
      "step": 2380
    },
    {
      "epoch": 0.6968100673105063,
      "grad_norm": 5.882567882537842,
      "learning_rate": 3.2579748317237346e-05,
      "loss": 0.0526,
      "step": 2381
    },
    {
      "epoch": 0.6971027216856892,
      "grad_norm": 0.05859658122062683,
      "learning_rate": 3.2572431957857774e-05,
      "loss": 0.0006,
      "step": 2382
    },
    {
      "epoch": 0.6973953760608721,
      "grad_norm": 0.03432149067521095,
      "learning_rate": 3.2565115598478195e-05,
      "loss": 0.0004,
      "step": 2383
    },
    {
      "epoch": 0.697688030436055,
      "grad_norm": 2.2817699909210205,
      "learning_rate": 3.255779923909862e-05,
      "loss": 0.0092,
      "step": 2384
    },
    {
      "epoch": 0.6979806848112379,
      "grad_norm": 3.7983312606811523,
      "learning_rate": 3.255048287971905e-05,
      "loss": 0.2313,
      "step": 2385
    },
    {
      "epoch": 0.6982733391864209,
      "grad_norm": 11.28543472290039,
      "learning_rate": 3.254316652033948e-05,
      "loss": 0.1115,
      "step": 2386
    },
    {
      "epoch": 0.6985659935616038,
      "grad_norm": 0.011281290091574192,
      "learning_rate": 3.253585016095991e-05,
      "loss": 0.0002,
      "step": 2387
    },
    {
      "epoch": 0.6988586479367866,
      "grad_norm": 0.016820495948195457,
      "learning_rate": 3.2528533801580335e-05,
      "loss": 0.0002,
      "step": 2388
    },
    {
      "epoch": 0.6991513023119695,
      "grad_norm": 0.07670903205871582,
      "learning_rate": 3.252121744220076e-05,
      "loss": 0.0007,
      "step": 2389
    },
    {
      "epoch": 0.6994439566871524,
      "grad_norm": 2.261282205581665,
      "learning_rate": 3.251390108282119e-05,
      "loss": 0.009,
      "step": 2390
    },
    {
      "epoch": 0.6997366110623354,
      "grad_norm": 6.105201244354248,
      "learning_rate": 3.250658472344162e-05,
      "loss": 0.057,
      "step": 2391
    },
    {
      "epoch": 0.7000292654375183,
      "grad_norm": 0.01005995087325573,
      "learning_rate": 3.249926836406205e-05,
      "loss": 0.0002,
      "step": 2392
    },
    {
      "epoch": 0.7003219198127012,
      "grad_norm": 5.082892894744873,
      "learning_rate": 3.249195200468247e-05,
      "loss": 0.0362,
      "step": 2393
    },
    {
      "epoch": 0.7006145741878841,
      "grad_norm": 0.10898295044898987,
      "learning_rate": 3.2484635645302896e-05,
      "loss": 0.0009,
      "step": 2394
    },
    {
      "epoch": 0.700907228563067,
      "grad_norm": 0.008598077110946178,
      "learning_rate": 3.2477319285923324e-05,
      "loss": 0.0002,
      "step": 2395
    },
    {
      "epoch": 0.70119988293825,
      "grad_norm": 0.0072875553742051125,
      "learning_rate": 3.247000292654375e-05,
      "loss": 0.0002,
      "step": 2396
    },
    {
      "epoch": 0.7014925373134329,
      "grad_norm": 0.01747208461165428,
      "learning_rate": 3.246268656716418e-05,
      "loss": 0.0003,
      "step": 2397
    },
    {
      "epoch": 0.7017851916886158,
      "grad_norm": 0.06051646173000336,
      "learning_rate": 3.245537020778461e-05,
      "loss": 0.0007,
      "step": 2398
    },
    {
      "epoch": 0.7020778460637986,
      "grad_norm": 1.8474124670028687,
      "learning_rate": 3.2448053848405036e-05,
      "loss": 0.0113,
      "step": 2399
    },
    {
      "epoch": 0.7023705004389815,
      "grad_norm": 0.006191436201334,
      "learning_rate": 3.2440737489025464e-05,
      "loss": 0.0001,
      "step": 2400
    },
    {
      "epoch": 0.7026631548141645,
      "grad_norm": 0.22403065860271454,
      "learning_rate": 3.243342112964589e-05,
      "loss": 0.0017,
      "step": 2401
    },
    {
      "epoch": 0.7029558091893474,
      "grad_norm": 0.09544986486434937,
      "learning_rate": 3.242610477026632e-05,
      "loss": 0.0009,
      "step": 2402
    },
    {
      "epoch": 0.7032484635645303,
      "grad_norm": 0.0061316415667533875,
      "learning_rate": 3.241878841088675e-05,
      "loss": 0.0001,
      "step": 2403
    },
    {
      "epoch": 0.7035411179397132,
      "grad_norm": 0.0035257881972938776,
      "learning_rate": 3.241147205150717e-05,
      "loss": 0.0001,
      "step": 2404
    },
    {
      "epoch": 0.7038337723148961,
      "grad_norm": 0.0012417975813150406,
      "learning_rate": 3.2404155692127597e-05,
      "loss": 0.0,
      "step": 2405
    },
    {
      "epoch": 0.704126426690079,
      "grad_norm": 0.006698724813759327,
      "learning_rate": 3.2396839332748024e-05,
      "loss": 0.0002,
      "step": 2406
    },
    {
      "epoch": 0.704419081065262,
      "grad_norm": 0.005079933907836676,
      "learning_rate": 3.238952297336845e-05,
      "loss": 0.0001,
      "step": 2407
    },
    {
      "epoch": 0.7047117354404449,
      "grad_norm": 0.005624685902148485,
      "learning_rate": 3.238220661398888e-05,
      "loss": 0.0002,
      "step": 2408
    },
    {
      "epoch": 0.7050043898156277,
      "grad_norm": 3.10202693939209,
      "learning_rate": 3.237489025460931e-05,
      "loss": 0.0255,
      "step": 2409
    },
    {
      "epoch": 0.7052970441908106,
      "grad_norm": 0.003435496473684907,
      "learning_rate": 3.2367573895229736e-05,
      "loss": 0.0001,
      "step": 2410
    },
    {
      "epoch": 0.7055896985659935,
      "grad_norm": 0.005849195644259453,
      "learning_rate": 3.2360257535850164e-05,
      "loss": 0.0001,
      "step": 2411
    },
    {
      "epoch": 0.7058823529411765,
      "grad_norm": 0.0022957089822739363,
      "learning_rate": 3.235294117647059e-05,
      "loss": 0.0,
      "step": 2412
    },
    {
      "epoch": 0.7061750073163594,
      "grad_norm": 0.0034467079676687717,
      "learning_rate": 3.234562481709102e-05,
      "loss": 0.0001,
      "step": 2413
    },
    {
      "epoch": 0.7064676616915423,
      "grad_norm": 0.0022936102468520403,
      "learning_rate": 3.233830845771145e-05,
      "loss": 0.0001,
      "step": 2414
    },
    {
      "epoch": 0.7067603160667252,
      "grad_norm": 0.04932126775383949,
      "learning_rate": 3.233099209833187e-05,
      "loss": 0.0003,
      "step": 2415
    },
    {
      "epoch": 0.7070529704419081,
      "grad_norm": 0.001915495377033949,
      "learning_rate": 3.23236757389523e-05,
      "loss": 0.0001,
      "step": 2416
    },
    {
      "epoch": 0.7073456248170911,
      "grad_norm": 0.13249242305755615,
      "learning_rate": 3.2316359379572725e-05,
      "loss": 0.0006,
      "step": 2417
    },
    {
      "epoch": 0.707638279192274,
      "grad_norm": 0.0058246999979019165,
      "learning_rate": 3.230904302019315e-05,
      "loss": 0.0001,
      "step": 2418
    },
    {
      "epoch": 0.7079309335674568,
      "grad_norm": 7.282301902770996,
      "learning_rate": 3.230172666081358e-05,
      "loss": 0.0545,
      "step": 2419
    },
    {
      "epoch": 0.7082235879426397,
      "grad_norm": 0.04346953332424164,
      "learning_rate": 3.229441030143401e-05,
      "loss": 0.0002,
      "step": 2420
    },
    {
      "epoch": 0.7085162423178226,
      "grad_norm": 0.44268798828125,
      "learning_rate": 3.228709394205444e-05,
      "loss": 0.0019,
      "step": 2421
    },
    {
      "epoch": 0.7088088966930055,
      "grad_norm": 0.0017859560903161764,
      "learning_rate": 3.2279777582674865e-05,
      "loss": 0.0,
      "step": 2422
    },
    {
      "epoch": 0.7091015510681885,
      "grad_norm": 0.022358566522598267,
      "learning_rate": 3.227246122329529e-05,
      "loss": 0.0002,
      "step": 2423
    },
    {
      "epoch": 0.7093942054433714,
      "grad_norm": 0.005604309495538473,
      "learning_rate": 3.226514486391572e-05,
      "loss": 0.0001,
      "step": 2424
    },
    {
      "epoch": 0.7096868598185543,
      "grad_norm": 0.011078868061304092,
      "learning_rate": 3.225782850453614e-05,
      "loss": 0.0001,
      "step": 2425
    },
    {
      "epoch": 0.7099795141937372,
      "grad_norm": 0.10173115134239197,
      "learning_rate": 3.225051214515657e-05,
      "loss": 0.0004,
      "step": 2426
    },
    {
      "epoch": 0.7102721685689201,
      "grad_norm": 0.001832267502322793,
      "learning_rate": 3.2243195785777e-05,
      "loss": 0.0,
      "step": 2427
    },
    {
      "epoch": 0.7105648229441031,
      "grad_norm": 0.0018338344525545835,
      "learning_rate": 3.2235879426397426e-05,
      "loss": 0.0,
      "step": 2428
    },
    {
      "epoch": 0.710857477319286,
      "grad_norm": 0.004381049424409866,
      "learning_rate": 3.2228563067017854e-05,
      "loss": 0.0001,
      "step": 2429
    },
    {
      "epoch": 0.7111501316944688,
      "grad_norm": 6.8078293800354,
      "learning_rate": 3.222124670763828e-05,
      "loss": 0.0201,
      "step": 2430
    },
    {
      "epoch": 0.7114427860696517,
      "grad_norm": 0.0014487294247373939,
      "learning_rate": 3.221393034825871e-05,
      "loss": 0.0,
      "step": 2431
    },
    {
      "epoch": 0.7117354404448346,
      "grad_norm": 0.001356608932837844,
      "learning_rate": 3.220661398887914e-05,
      "loss": 0.0,
      "step": 2432
    },
    {
      "epoch": 0.7120280948200176,
      "grad_norm": 0.0017179345013573766,
      "learning_rate": 3.2199297629499566e-05,
      "loss": 0.0,
      "step": 2433
    },
    {
      "epoch": 0.7123207491952005,
      "grad_norm": 0.0007872664136812091,
      "learning_rate": 3.2191981270119994e-05,
      "loss": 0.0,
      "step": 2434
    },
    {
      "epoch": 0.7126134035703834,
      "grad_norm": 0.004386863671243191,
      "learning_rate": 3.218466491074042e-05,
      "loss": 0.0001,
      "step": 2435
    },
    {
      "epoch": 0.7129060579455663,
      "grad_norm": 0.0011565539753064513,
      "learning_rate": 3.217734855136084e-05,
      "loss": 0.0,
      "step": 2436
    },
    {
      "epoch": 0.7131987123207492,
      "grad_norm": 0.0011458956869319081,
      "learning_rate": 3.217003219198127e-05,
      "loss": 0.0,
      "step": 2437
    },
    {
      "epoch": 0.713491366695932,
      "grad_norm": 0.0006992155686020851,
      "learning_rate": 3.21627158326017e-05,
      "loss": 0.0,
      "step": 2438
    },
    {
      "epoch": 0.713784021071115,
      "grad_norm": 0.0013631065376102924,
      "learning_rate": 3.2155399473222127e-05,
      "loss": 0.0,
      "step": 2439
    },
    {
      "epoch": 0.7140766754462979,
      "grad_norm": 0.0007857891614548862,
      "learning_rate": 3.2148083113842554e-05,
      "loss": 0.0,
      "step": 2440
    },
    {
      "epoch": 0.7143693298214808,
      "grad_norm": 0.04865026846528053,
      "learning_rate": 3.214076675446298e-05,
      "loss": 0.0003,
      "step": 2441
    },
    {
      "epoch": 0.7146619841966637,
      "grad_norm": 6.565481185913086,
      "learning_rate": 3.213345039508341e-05,
      "loss": 0.2418,
      "step": 2442
    },
    {
      "epoch": 0.7149546385718466,
      "grad_norm": 0.0007474518497474492,
      "learning_rate": 3.212613403570384e-05,
      "loss": 0.0,
      "step": 2443
    },
    {
      "epoch": 0.7152472929470296,
      "grad_norm": 0.0006861622678115964,
      "learning_rate": 3.2118817676324266e-05,
      "loss": 0.0,
      "step": 2444
    },
    {
      "epoch": 0.7155399473222125,
      "grad_norm": 0.017203431576490402,
      "learning_rate": 3.2111501316944694e-05,
      "loss": 0.0002,
      "step": 2445
    },
    {
      "epoch": 0.7158326016973954,
      "grad_norm": 0.0011003983672708273,
      "learning_rate": 3.210418495756512e-05,
      "loss": 0.0,
      "step": 2446
    },
    {
      "epoch": 0.7161252560725783,
      "grad_norm": 0.00291343592107296,
      "learning_rate": 3.209686859818554e-05,
      "loss": 0.0001,
      "step": 2447
    },
    {
      "epoch": 0.7164179104477612,
      "grad_norm": 0.0013620432000607252,
      "learning_rate": 3.208955223880597e-05,
      "loss": 0.0,
      "step": 2448
    },
    {
      "epoch": 0.7167105648229442,
      "grad_norm": 0.4714560806751251,
      "learning_rate": 3.20822358794264e-05,
      "loss": 0.0012,
      "step": 2449
    },
    {
      "epoch": 0.717003219198127,
      "grad_norm": 0.018954362720251083,
      "learning_rate": 3.207491952004683e-05,
      "loss": 0.0001,
      "step": 2450
    },
    {
      "epoch": 0.7172958735733099,
      "grad_norm": 0.001146254246123135,
      "learning_rate": 3.2067603160667255e-05,
      "loss": 0.0,
      "step": 2451
    },
    {
      "epoch": 0.7175885279484928,
      "grad_norm": 0.001986933872103691,
      "learning_rate": 3.206028680128768e-05,
      "loss": 0.0,
      "step": 2452
    },
    {
      "epoch": 0.7178811823236757,
      "grad_norm": 0.002456973074004054,
      "learning_rate": 3.205297044190811e-05,
      "loss": 0.0001,
      "step": 2453
    },
    {
      "epoch": 0.7181738366988587,
      "grad_norm": 0.0031812170054763556,
      "learning_rate": 3.204565408252854e-05,
      "loss": 0.0001,
      "step": 2454
    },
    {
      "epoch": 0.7184664910740416,
      "grad_norm": 0.0009945594938471913,
      "learning_rate": 3.203833772314897e-05,
      "loss": 0.0,
      "step": 2455
    },
    {
      "epoch": 0.7187591454492245,
      "grad_norm": 0.0007926966063678265,
      "learning_rate": 3.2031021363769395e-05,
      "loss": 0.0,
      "step": 2456
    },
    {
      "epoch": 0.7190517998244074,
      "grad_norm": 0.003633065614849329,
      "learning_rate": 3.2023705004389816e-05,
      "loss": 0.0001,
      "step": 2457
    },
    {
      "epoch": 0.7193444541995903,
      "grad_norm": 0.0019254813669249415,
      "learning_rate": 3.2016388645010244e-05,
      "loss": 0.0,
      "step": 2458
    },
    {
      "epoch": 0.7196371085747731,
      "grad_norm": 0.0014255027053877711,
      "learning_rate": 3.200907228563067e-05,
      "loss": 0.0,
      "step": 2459
    },
    {
      "epoch": 0.7199297629499561,
      "grad_norm": 0.0015393865760415792,
      "learning_rate": 3.20017559262511e-05,
      "loss": 0.0,
      "step": 2460
    },
    {
      "epoch": 0.720222417325139,
      "grad_norm": 0.0018572082044556737,
      "learning_rate": 3.199443956687153e-05,
      "loss": 0.0,
      "step": 2461
    },
    {
      "epoch": 0.7205150717003219,
      "grad_norm": 5.579071998596191,
      "learning_rate": 3.1987123207491956e-05,
      "loss": 0.0084,
      "step": 2462
    },
    {
      "epoch": 0.7208077260755048,
      "grad_norm": 0.0015191318234428763,
      "learning_rate": 3.1979806848112384e-05,
      "loss": 0.0,
      "step": 2463
    },
    {
      "epoch": 0.7211003804506877,
      "grad_norm": 0.0009283102699555457,
      "learning_rate": 3.197249048873281e-05,
      "loss": 0.0,
      "step": 2464
    },
    {
      "epoch": 0.7213930348258707,
      "grad_norm": 0.0024321016389876604,
      "learning_rate": 3.196517412935324e-05,
      "loss": 0.0,
      "step": 2465
    },
    {
      "epoch": 0.7216856892010536,
      "grad_norm": 0.005111926235258579,
      "learning_rate": 3.195785776997367e-05,
      "loss": 0.0001,
      "step": 2466
    },
    {
      "epoch": 0.7219783435762365,
      "grad_norm": 0.000839318847283721,
      "learning_rate": 3.1950541410594096e-05,
      "loss": 0.0,
      "step": 2467
    },
    {
      "epoch": 0.7222709979514194,
      "grad_norm": 0.0005801770021207631,
      "learning_rate": 3.194322505121452e-05,
      "loss": 0.0,
      "step": 2468
    },
    {
      "epoch": 0.7225636523266022,
      "grad_norm": 0.0018791498150676489,
      "learning_rate": 3.1935908691834945e-05,
      "loss": 0.0001,
      "step": 2469
    },
    {
      "epoch": 0.7228563067017852,
      "grad_norm": 0.00031380855944007635,
      "learning_rate": 3.192859233245537e-05,
      "loss": 0.0,
      "step": 2470
    },
    {
      "epoch": 0.7231489610769681,
      "grad_norm": 9.160697937011719,
      "learning_rate": 3.19212759730758e-05,
      "loss": 0.1265,
      "step": 2471
    },
    {
      "epoch": 0.723441615452151,
      "grad_norm": 0.000885524321347475,
      "learning_rate": 3.191395961369623e-05,
      "loss": 0.0,
      "step": 2472
    },
    {
      "epoch": 0.7237342698273339,
      "grad_norm": 0.0005429118173196912,
      "learning_rate": 3.1906643254316656e-05,
      "loss": 0.0,
      "step": 2473
    },
    {
      "epoch": 0.7240269242025168,
      "grad_norm": 0.0030722729861736298,
      "learning_rate": 3.1899326894937084e-05,
      "loss": 0.0,
      "step": 2474
    },
    {
      "epoch": 0.7243195785776997,
      "grad_norm": 0.5611380934715271,
      "learning_rate": 3.189201053555751e-05,
      "loss": 0.0011,
      "step": 2475
    },
    {
      "epoch": 0.7246122329528827,
      "grad_norm": 0.0014798748306930065,
      "learning_rate": 3.188469417617794e-05,
      "loss": 0.0,
      "step": 2476
    },
    {
      "epoch": 0.7249048873280656,
      "grad_norm": 0.0015373347559943795,
      "learning_rate": 3.187737781679837e-05,
      "loss": 0.0,
      "step": 2477
    },
    {
      "epoch": 0.7251975417032485,
      "grad_norm": 0.007843293249607086,
      "learning_rate": 3.187006145741879e-05,
      "loss": 0.0001,
      "step": 2478
    },
    {
      "epoch": 0.7254901960784313,
      "grad_norm": 0.06513158231973648,
      "learning_rate": 3.186274509803922e-05,
      "loss": 0.0006,
      "step": 2479
    },
    {
      "epoch": 0.7257828504536142,
      "grad_norm": 0.0035666245967149734,
      "learning_rate": 3.1855428738659645e-05,
      "loss": 0.0001,
      "step": 2480
    },
    {
      "epoch": 0.7260755048287972,
      "grad_norm": 0.0018813954666256905,
      "learning_rate": 3.184811237928007e-05,
      "loss": 0.0001,
      "step": 2481
    },
    {
      "epoch": 0.7263681592039801,
      "grad_norm": 0.0012866331962868571,
      "learning_rate": 3.18407960199005e-05,
      "loss": 0.0,
      "step": 2482
    },
    {
      "epoch": 0.726660813579163,
      "grad_norm": 0.002225383883342147,
      "learning_rate": 3.183347966052093e-05,
      "loss": 0.0,
      "step": 2483
    },
    {
      "epoch": 0.7269534679543459,
      "grad_norm": 0.006104538217186928,
      "learning_rate": 3.182616330114136e-05,
      "loss": 0.0001,
      "step": 2484
    },
    {
      "epoch": 0.7272461223295288,
      "grad_norm": 2.0093445777893066,
      "learning_rate": 3.1818846941761785e-05,
      "loss": 0.0038,
      "step": 2485
    },
    {
      "epoch": 0.7275387767047118,
      "grad_norm": 0.0065629081800580025,
      "learning_rate": 3.1811530582382206e-05,
      "loss": 0.0001,
      "step": 2486
    },
    {
      "epoch": 0.7278314310798947,
      "grad_norm": 4.3598103523254395,
      "learning_rate": 3.1804214223002634e-05,
      "loss": 0.0149,
      "step": 2487
    },
    {
      "epoch": 0.7281240854550776,
      "grad_norm": 0.004508009646087885,
      "learning_rate": 3.179689786362306e-05,
      "loss": 0.0001,
      "step": 2488
    },
    {
      "epoch": 0.7284167398302605,
      "grad_norm": 0.00616801343858242,
      "learning_rate": 3.178958150424349e-05,
      "loss": 0.0001,
      "step": 2489
    },
    {
      "epoch": 0.7287093942054433,
      "grad_norm": 1.723605751991272,
      "learning_rate": 3.178226514486392e-05,
      "loss": 0.0039,
      "step": 2490
    },
    {
      "epoch": 0.7290020485806262,
      "grad_norm": 0.0013585267588496208,
      "learning_rate": 3.1774948785484346e-05,
      "loss": 0.0,
      "step": 2491
    },
    {
      "epoch": 0.7292947029558092,
      "grad_norm": 0.004794291220605373,
      "learning_rate": 3.1767632426104774e-05,
      "loss": 0.0001,
      "step": 2492
    },
    {
      "epoch": 0.7295873573309921,
      "grad_norm": 0.003616244299337268,
      "learning_rate": 3.17603160667252e-05,
      "loss": 0.0001,
      "step": 2493
    },
    {
      "epoch": 0.729880011706175,
      "grad_norm": 0.004236791282892227,
      "learning_rate": 3.175299970734562e-05,
      "loss": 0.0001,
      "step": 2494
    },
    {
      "epoch": 0.7301726660813579,
      "grad_norm": 0.0011380615178495646,
      "learning_rate": 3.174568334796605e-05,
      "loss": 0.0,
      "step": 2495
    },
    {
      "epoch": 0.7304653204565408,
      "grad_norm": 0.0005486432346515357,
      "learning_rate": 3.173836698858648e-05,
      "loss": 0.0,
      "step": 2496
    },
    {
      "epoch": 0.7307579748317238,
      "grad_norm": 0.0009223229717463255,
      "learning_rate": 3.173105062920691e-05,
      "loss": 0.0,
      "step": 2497
    },
    {
      "epoch": 0.7310506292069067,
      "grad_norm": 0.058350205421447754,
      "learning_rate": 3.1723734269827335e-05,
      "loss": 0.0002,
      "step": 2498
    },
    {
      "epoch": 0.7313432835820896,
      "grad_norm": 0.0006941952742636204,
      "learning_rate": 3.171641791044776e-05,
      "loss": 0.0,
      "step": 2499
    },
    {
      "epoch": 0.7316359379572724,
      "grad_norm": 0.004078224301338196,
      "learning_rate": 3.170910155106819e-05,
      "loss": 0.0001,
      "step": 2500
    },
    {
      "epoch": 0.7319285923324553,
      "grad_norm": 0.014169774949550629,
      "learning_rate": 3.170178519168862e-05,
      "loss": 0.0002,
      "step": 2501
    },
    {
      "epoch": 0.7322212467076383,
      "grad_norm": 0.004702478647232056,
      "learning_rate": 3.169446883230904e-05,
      "loss": 0.0,
      "step": 2502
    },
    {
      "epoch": 0.7325139010828212,
      "grad_norm": 0.00040812077349983156,
      "learning_rate": 3.168715247292947e-05,
      "loss": 0.0,
      "step": 2503
    },
    {
      "epoch": 0.7328065554580041,
      "grad_norm": 0.016532549634575844,
      "learning_rate": 3.1679836113549896e-05,
      "loss": 0.0001,
      "step": 2504
    },
    {
      "epoch": 0.733099209833187,
      "grad_norm": 0.0008503179415129125,
      "learning_rate": 3.1672519754170324e-05,
      "loss": 0.0,
      "step": 2505
    },
    {
      "epoch": 0.7333918642083699,
      "grad_norm": 0.024454286321997643,
      "learning_rate": 3.166520339479075e-05,
      "loss": 0.0001,
      "step": 2506
    },
    {
      "epoch": 0.7336845185835528,
      "grad_norm": 10.554803848266602,
      "learning_rate": 3.165788703541118e-05,
      "loss": 0.1,
      "step": 2507
    },
    {
      "epoch": 0.7339771729587358,
      "grad_norm": 0.0017941119149327278,
      "learning_rate": 3.165057067603161e-05,
      "loss": 0.0,
      "step": 2508
    },
    {
      "epoch": 0.7342698273339187,
      "grad_norm": 0.001540915691293776,
      "learning_rate": 3.1643254316652036e-05,
      "loss": 0.0,
      "step": 2509
    },
    {
      "epoch": 0.7345624817091015,
      "grad_norm": 0.0006537814042530954,
      "learning_rate": 3.163593795727246e-05,
      "loss": 0.0,
      "step": 2510
    },
    {
      "epoch": 0.7348551360842844,
      "grad_norm": 0.0022911422420293093,
      "learning_rate": 3.1628621597892885e-05,
      "loss": 0.0,
      "step": 2511
    },
    {
      "epoch": 0.7351477904594673,
      "grad_norm": 0.0009376522502861917,
      "learning_rate": 3.162130523851331e-05,
      "loss": 0.0,
      "step": 2512
    },
    {
      "epoch": 0.7354404448346503,
      "grad_norm": 0.004998430144041777,
      "learning_rate": 3.161398887913374e-05,
      "loss": 0.0001,
      "step": 2513
    },
    {
      "epoch": 0.7357330992098332,
      "grad_norm": 0.0005815597833134234,
      "learning_rate": 3.160667251975417e-05,
      "loss": 0.0,
      "step": 2514
    },
    {
      "epoch": 0.7360257535850161,
      "grad_norm": 0.006281370297074318,
      "learning_rate": 3.1599356160374597e-05,
      "loss": 0.0001,
      "step": 2515
    },
    {
      "epoch": 0.736318407960199,
      "grad_norm": 0.0013787158532068133,
      "learning_rate": 3.1592039800995024e-05,
      "loss": 0.0,
      "step": 2516
    },
    {
      "epoch": 0.7366110623353819,
      "grad_norm": 0.0031138560734689236,
      "learning_rate": 3.158472344161545e-05,
      "loss": 0.0,
      "step": 2517
    },
    {
      "epoch": 0.7369037167105649,
      "grad_norm": 5.179323673248291,
      "learning_rate": 3.157740708223588e-05,
      "loss": 0.1861,
      "step": 2518
    },
    {
      "epoch": 0.7371963710857478,
      "grad_norm": 0.0007730225916020572,
      "learning_rate": 3.157009072285631e-05,
      "loss": 0.0,
      "step": 2519
    },
    {
      "epoch": 0.7374890254609306,
      "grad_norm": 0.0030811233446002007,
      "learning_rate": 3.1562774363476736e-05,
      "loss": 0.0,
      "step": 2520
    },
    {
      "epoch": 0.7377816798361135,
      "grad_norm": 7.347812652587891,
      "learning_rate": 3.155545800409716e-05,
      "loss": 0.0458,
      "step": 2521
    },
    {
      "epoch": 0.7380743342112964,
      "grad_norm": 0.001976597122848034,
      "learning_rate": 3.1548141644717585e-05,
      "loss": 0.0001,
      "step": 2522
    },
    {
      "epoch": 0.7383669885864794,
      "grad_norm": 0.0035360397305339575,
      "learning_rate": 3.154082528533801e-05,
      "loss": 0.0001,
      "step": 2523
    },
    {
      "epoch": 0.7386596429616623,
      "grad_norm": 0.028700485825538635,
      "learning_rate": 3.153350892595844e-05,
      "loss": 0.0002,
      "step": 2524
    },
    {
      "epoch": 0.7389522973368452,
      "grad_norm": 0.04599932208657265,
      "learning_rate": 3.152619256657887e-05,
      "loss": 0.0003,
      "step": 2525
    },
    {
      "epoch": 0.7392449517120281,
      "grad_norm": 0.020827241241931915,
      "learning_rate": 3.15188762071993e-05,
      "loss": 0.0002,
      "step": 2526
    },
    {
      "epoch": 0.739537606087211,
      "grad_norm": 0.004416859708726406,
      "learning_rate": 3.1511559847819725e-05,
      "loss": 0.0,
      "step": 2527
    },
    {
      "epoch": 0.7398302604623939,
      "grad_norm": 0.00646563433110714,
      "learning_rate": 3.150424348844015e-05,
      "loss": 0.0001,
      "step": 2528
    },
    {
      "epoch": 0.7401229148375769,
      "grad_norm": 0.0018413313664495945,
      "learning_rate": 3.149692712906058e-05,
      "loss": 0.0,
      "step": 2529
    },
    {
      "epoch": 0.7404155692127597,
      "grad_norm": 0.002748123137280345,
      "learning_rate": 3.148961076968101e-05,
      "loss": 0.0001,
      "step": 2530
    },
    {
      "epoch": 0.7407082235879426,
      "grad_norm": 0.010115600191056728,
      "learning_rate": 3.148229441030143e-05,
      "loss": 0.0001,
      "step": 2531
    },
    {
      "epoch": 0.7410008779631255,
      "grad_norm": 0.05594480037689209,
      "learning_rate": 3.147497805092186e-05,
      "loss": 0.0003,
      "step": 2532
    },
    {
      "epoch": 0.7412935323383084,
      "grad_norm": 0.000752597872633487,
      "learning_rate": 3.1467661691542286e-05,
      "loss": 0.0,
      "step": 2533
    },
    {
      "epoch": 0.7415861867134914,
      "grad_norm": 0.0009483325993642211,
      "learning_rate": 3.1460345332162714e-05,
      "loss": 0.0,
      "step": 2534
    },
    {
      "epoch": 0.7418788410886743,
      "grad_norm": 0.001707609393633902,
      "learning_rate": 3.145302897278314e-05,
      "loss": 0.0,
      "step": 2535
    },
    {
      "epoch": 0.7421714954638572,
      "grad_norm": 0.012230713851749897,
      "learning_rate": 3.144571261340357e-05,
      "loss": 0.0001,
      "step": 2536
    },
    {
      "epoch": 0.7424641498390401,
      "grad_norm": 0.0016634443309158087,
      "learning_rate": 3.1438396254024e-05,
      "loss": 0.0,
      "step": 2537
    },
    {
      "epoch": 0.742756804214223,
      "grad_norm": 0.008607159368693829,
      "learning_rate": 3.1431079894644426e-05,
      "loss": 0.0002,
      "step": 2538
    },
    {
      "epoch": 0.743049458589406,
      "grad_norm": 0.01903282105922699,
      "learning_rate": 3.1423763535264854e-05,
      "loss": 0.0002,
      "step": 2539
    },
    {
      "epoch": 0.7433421129645889,
      "grad_norm": 0.001686413073912263,
      "learning_rate": 3.141644717588528e-05,
      "loss": 0.0,
      "step": 2540
    },
    {
      "epoch": 0.7436347673397717,
      "grad_norm": 0.001426143106073141,
      "learning_rate": 3.140913081650571e-05,
      "loss": 0.0,
      "step": 2541
    },
    {
      "epoch": 0.7439274217149546,
      "grad_norm": 0.002940902952104807,
      "learning_rate": 3.140181445712613e-05,
      "loss": 0.0001,
      "step": 2542
    },
    {
      "epoch": 0.7442200760901375,
      "grad_norm": 0.0014354386366903782,
      "learning_rate": 3.139449809774656e-05,
      "loss": 0.0,
      "step": 2543
    },
    {
      "epoch": 0.7445127304653204,
      "grad_norm": 0.0016779705183580518,
      "learning_rate": 3.138718173836699e-05,
      "loss": 0.0,
      "step": 2544
    },
    {
      "epoch": 0.7448053848405034,
      "grad_norm": 0.0019332582596689463,
      "learning_rate": 3.1379865378987415e-05,
      "loss": 0.0001,
      "step": 2545
    },
    {
      "epoch": 0.7450980392156863,
      "grad_norm": 0.007013517431914806,
      "learning_rate": 3.137254901960784e-05,
      "loss": 0.0001,
      "step": 2546
    },
    {
      "epoch": 0.7453906935908692,
      "grad_norm": 0.001592212007381022,
      "learning_rate": 3.136523266022827e-05,
      "loss": 0.0,
      "step": 2547
    },
    {
      "epoch": 0.7456833479660521,
      "grad_norm": 0.0041719297878444195,
      "learning_rate": 3.13579163008487e-05,
      "loss": 0.0001,
      "step": 2548
    },
    {
      "epoch": 0.745976002341235,
      "grad_norm": 0.0015470386715605855,
      "learning_rate": 3.1350599941469126e-05,
      "loss": 0.0,
      "step": 2549
    },
    {
      "epoch": 0.746268656716418,
      "grad_norm": 0.14043869078159332,
      "learning_rate": 3.1343283582089554e-05,
      "loss": 0.0007,
      "step": 2550
    },
    {
      "epoch": 0.7465613110916008,
      "grad_norm": 0.012669500894844532,
      "learning_rate": 3.133596722270998e-05,
      "loss": 0.0001,
      "step": 2551
    },
    {
      "epoch": 0.7468539654667837,
      "grad_norm": 0.005013457499444485,
      "learning_rate": 3.132865086333041e-05,
      "loss": 0.0001,
      "step": 2552
    },
    {
      "epoch": 0.7471466198419666,
      "grad_norm": 0.00242857588455081,
      "learning_rate": 3.132133450395083e-05,
      "loss": 0.0001,
      "step": 2553
    },
    {
      "epoch": 0.7474392742171495,
      "grad_norm": 0.001480822917073965,
      "learning_rate": 3.131401814457126e-05,
      "loss": 0.0,
      "step": 2554
    },
    {
      "epoch": 0.7477319285923325,
      "grad_norm": 0.005603375378996134,
      "learning_rate": 3.130670178519169e-05,
      "loss": 0.0001,
      "step": 2555
    },
    {
      "epoch": 0.7480245829675154,
      "grad_norm": 0.018584266304969788,
      "learning_rate": 3.1299385425812115e-05,
      "loss": 0.0002,
      "step": 2556
    },
    {
      "epoch": 0.7483172373426983,
      "grad_norm": 0.002079001860693097,
      "learning_rate": 3.129206906643254e-05,
      "loss": 0.0,
      "step": 2557
    },
    {
      "epoch": 0.7486098917178812,
      "grad_norm": 0.018753597512841225,
      "learning_rate": 3.128475270705297e-05,
      "loss": 0.0001,
      "step": 2558
    },
    {
      "epoch": 0.7489025460930641,
      "grad_norm": 0.0020891628228127956,
      "learning_rate": 3.12774363476734e-05,
      "loss": 0.0001,
      "step": 2559
    },
    {
      "epoch": 0.749195200468247,
      "grad_norm": 0.0036165923811495304,
      "learning_rate": 3.127011998829383e-05,
      "loss": 0.0001,
      "step": 2560
    },
    {
      "epoch": 0.74948785484343,
      "grad_norm": 0.0031803473830223083,
      "learning_rate": 3.1262803628914255e-05,
      "loss": 0.0001,
      "step": 2561
    },
    {
      "epoch": 0.7497805092186128,
      "grad_norm": 0.0026807805988937616,
      "learning_rate": 3.125548726953468e-05,
      "loss": 0.0001,
      "step": 2562
    },
    {
      "epoch": 0.7500731635937957,
      "grad_norm": 0.03576747328042984,
      "learning_rate": 3.1248170910155104e-05,
      "loss": 0.0002,
      "step": 2563
    },
    {
      "epoch": 0.7503658179689786,
      "grad_norm": 0.00326459645293653,
      "learning_rate": 3.124085455077553e-05,
      "loss": 0.0001,
      "step": 2564
    },
    {
      "epoch": 0.7506584723441615,
      "grad_norm": 0.0017964920261874795,
      "learning_rate": 3.123353819139596e-05,
      "loss": 0.0,
      "step": 2565
    },
    {
      "epoch": 0.7509511267193445,
      "grad_norm": 0.0020052739419043064,
      "learning_rate": 3.122622183201639e-05,
      "loss": 0.0001,
      "step": 2566
    },
    {
      "epoch": 0.7512437810945274,
      "grad_norm": 0.0005410740268416703,
      "learning_rate": 3.1218905472636816e-05,
      "loss": 0.0,
      "step": 2567
    },
    {
      "epoch": 0.7515364354697103,
      "grad_norm": 0.07815296202898026,
      "learning_rate": 3.1211589113257244e-05,
      "loss": 0.0003,
      "step": 2568
    },
    {
      "epoch": 0.7518290898448932,
      "grad_norm": 0.006977943703532219,
      "learning_rate": 3.120427275387767e-05,
      "loss": 0.0001,
      "step": 2569
    },
    {
      "epoch": 0.752121744220076,
      "grad_norm": 0.011482861824333668,
      "learning_rate": 3.11969563944981e-05,
      "loss": 0.0001,
      "step": 2570
    },
    {
      "epoch": 0.752414398595259,
      "grad_norm": 0.00260129664093256,
      "learning_rate": 3.118964003511853e-05,
      "loss": 0.0,
      "step": 2571
    },
    {
      "epoch": 0.7527070529704419,
      "grad_norm": 0.0028190468437969685,
      "learning_rate": 3.1182323675738956e-05,
      "loss": 0.0001,
      "step": 2572
    },
    {
      "epoch": 0.7529997073456248,
      "grad_norm": 0.006121335085481405,
      "learning_rate": 3.1175007316359384e-05,
      "loss": 0.0001,
      "step": 2573
    },
    {
      "epoch": 0.7532923617208077,
      "grad_norm": 0.0015971810789778829,
      "learning_rate": 3.1167690956979805e-05,
      "loss": 0.0,
      "step": 2574
    },
    {
      "epoch": 0.7535850160959906,
      "grad_norm": 0.016662990674376488,
      "learning_rate": 3.116037459760023e-05,
      "loss": 0.0002,
      "step": 2575
    },
    {
      "epoch": 0.7538776704711735,
      "grad_norm": 0.000863375433254987,
      "learning_rate": 3.115305823822066e-05,
      "loss": 0.0,
      "step": 2576
    },
    {
      "epoch": 0.7541703248463565,
      "grad_norm": 0.0011870183516293764,
      "learning_rate": 3.114574187884109e-05,
      "loss": 0.0,
      "step": 2577
    },
    {
      "epoch": 0.7544629792215394,
      "grad_norm": 0.004520727321505547,
      "learning_rate": 3.113842551946152e-05,
      "loss": 0.0001,
      "step": 2578
    },
    {
      "epoch": 0.7547556335967223,
      "grad_norm": 0.006965481676161289,
      "learning_rate": 3.1131109160081945e-05,
      "loss": 0.0001,
      "step": 2579
    },
    {
      "epoch": 0.7550482879719052,
      "grad_norm": 0.001343530253507197,
      "learning_rate": 3.112379280070237e-05,
      "loss": 0.0,
      "step": 2580
    },
    {
      "epoch": 0.755340942347088,
      "grad_norm": 0.0017632056260481477,
      "learning_rate": 3.11164764413228e-05,
      "loss": 0.0,
      "step": 2581
    },
    {
      "epoch": 0.755633596722271,
      "grad_norm": 0.0017508604796603322,
      "learning_rate": 3.110916008194323e-05,
      "loss": 0.0,
      "step": 2582
    },
    {
      "epoch": 0.7559262510974539,
      "grad_norm": 0.0011078683892264962,
      "learning_rate": 3.1101843722563656e-05,
      "loss": 0.0,
      "step": 2583
    },
    {
      "epoch": 0.7562189054726368,
      "grad_norm": 0.0008804899407550693,
      "learning_rate": 3.109452736318408e-05,
      "loss": 0.0,
      "step": 2584
    },
    {
      "epoch": 0.7565115598478197,
      "grad_norm": 0.07887193560600281,
      "learning_rate": 3.1087211003804506e-05,
      "loss": 0.0004,
      "step": 2585
    },
    {
      "epoch": 0.7568042142230026,
      "grad_norm": 0.005430367775261402,
      "learning_rate": 3.1079894644424934e-05,
      "loss": 0.0001,
      "step": 2586
    },
    {
      "epoch": 0.7570968685981856,
      "grad_norm": 0.0016514130402356386,
      "learning_rate": 3.107257828504536e-05,
      "loss": 0.0,
      "step": 2587
    },
    {
      "epoch": 0.7573895229733685,
      "grad_norm": 0.00218368717469275,
      "learning_rate": 3.106526192566579e-05,
      "loss": 0.0,
      "step": 2588
    },
    {
      "epoch": 0.7576821773485514,
      "grad_norm": 0.0007050017593428493,
      "learning_rate": 3.105794556628622e-05,
      "loss": 0.0,
      "step": 2589
    },
    {
      "epoch": 0.7579748317237343,
      "grad_norm": 0.0005608153296634555,
      "learning_rate": 3.1050629206906645e-05,
      "loss": 0.0,
      "step": 2590
    },
    {
      "epoch": 0.7582674860989171,
      "grad_norm": 0.000953198061324656,
      "learning_rate": 3.104331284752707e-05,
      "loss": 0.0,
      "step": 2591
    },
    {
      "epoch": 0.7585601404741001,
      "grad_norm": 0.001409333199262619,
      "learning_rate": 3.10359964881475e-05,
      "loss": 0.0,
      "step": 2592
    },
    {
      "epoch": 0.758852794849283,
      "grad_norm": 0.0007091196603141725,
      "learning_rate": 3.102868012876793e-05,
      "loss": 0.0,
      "step": 2593
    },
    {
      "epoch": 0.7591454492244659,
      "grad_norm": 0.0006789021426811814,
      "learning_rate": 3.102136376938836e-05,
      "loss": 0.0,
      "step": 2594
    },
    {
      "epoch": 0.7594381035996488,
      "grad_norm": 0.0012679515639320016,
      "learning_rate": 3.101404741000878e-05,
      "loss": 0.0,
      "step": 2595
    },
    {
      "epoch": 0.7597307579748317,
      "grad_norm": 0.0006374386139214039,
      "learning_rate": 3.1006731050629206e-05,
      "loss": 0.0,
      "step": 2596
    },
    {
      "epoch": 0.7600234123500146,
      "grad_norm": 7.963675498962402,
      "learning_rate": 3.0999414691249634e-05,
      "loss": 0.0368,
      "step": 2597
    },
    {
      "epoch": 0.7603160667251976,
      "grad_norm": 0.000828925461973995,
      "learning_rate": 3.099209833187006e-05,
      "loss": 0.0,
      "step": 2598
    },
    {
      "epoch": 0.7606087211003805,
      "grad_norm": 0.0005160178407095373,
      "learning_rate": 3.098478197249049e-05,
      "loss": 0.0,
      "step": 2599
    },
    {
      "epoch": 0.7609013754755634,
      "grad_norm": 0.0015012379735708237,
      "learning_rate": 3.097746561311092e-05,
      "loss": 0.0,
      "step": 2600
    },
    {
      "epoch": 0.7611940298507462,
      "grad_norm": 0.0016636387445032597,
      "learning_rate": 3.0970149253731346e-05,
      "loss": 0.0,
      "step": 2601
    },
    {
      "epoch": 0.7614866842259291,
      "grad_norm": 0.0010978828649967909,
      "learning_rate": 3.0962832894351774e-05,
      "loss": 0.0,
      "step": 2602
    },
    {
      "epoch": 0.7617793386011121,
      "grad_norm": 0.0026254807598888874,
      "learning_rate": 3.09555165349722e-05,
      "loss": 0.0001,
      "step": 2603
    },
    {
      "epoch": 0.762071992976295,
      "grad_norm": 0.007941869087517262,
      "learning_rate": 3.094820017559263e-05,
      "loss": 0.0001,
      "step": 2604
    },
    {
      "epoch": 0.7623646473514779,
      "grad_norm": 0.1966180056333542,
      "learning_rate": 3.094088381621306e-05,
      "loss": 0.0005,
      "step": 2605
    },
    {
      "epoch": 0.7626573017266608,
      "grad_norm": 0.0033722524531185627,
      "learning_rate": 3.093356745683348e-05,
      "loss": 0.0001,
      "step": 2606
    },
    {
      "epoch": 0.7629499561018437,
      "grad_norm": 0.032422102987766266,
      "learning_rate": 3.092625109745391e-05,
      "loss": 0.0004,
      "step": 2607
    },
    {
      "epoch": 0.7632426104770267,
      "grad_norm": 0.010612317360937595,
      "learning_rate": 3.0918934738074335e-05,
      "loss": 0.0001,
      "step": 2608
    },
    {
      "epoch": 0.7635352648522096,
      "grad_norm": 0.4488622546195984,
      "learning_rate": 3.091161837869476e-05,
      "loss": 0.0019,
      "step": 2609
    },
    {
      "epoch": 0.7638279192273925,
      "grad_norm": 0.0019041658379137516,
      "learning_rate": 3.090430201931519e-05,
      "loss": 0.0,
      "step": 2610
    },
    {
      "epoch": 0.7641205736025753,
      "grad_norm": 0.0013012103736400604,
      "learning_rate": 3.089698565993562e-05,
      "loss": 0.0,
      "step": 2611
    },
    {
      "epoch": 0.7644132279777582,
      "grad_norm": 0.0009472115198150277,
      "learning_rate": 3.088966930055605e-05,
      "loss": 0.0,
      "step": 2612
    },
    {
      "epoch": 0.7647058823529411,
      "grad_norm": 0.0014884189004078507,
      "learning_rate": 3.0882352941176475e-05,
      "loss": 0.0,
      "step": 2613
    },
    {
      "epoch": 0.7649985367281241,
      "grad_norm": 0.017035644501447678,
      "learning_rate": 3.08750365817969e-05,
      "loss": 0.0001,
      "step": 2614
    },
    {
      "epoch": 0.765291191103307,
      "grad_norm": 0.0269364845007658,
      "learning_rate": 3.086772022241733e-05,
      "loss": 0.0002,
      "step": 2615
    },
    {
      "epoch": 0.7655838454784899,
      "grad_norm": 0.011261007748544216,
      "learning_rate": 3.086040386303775e-05,
      "loss": 0.0001,
      "step": 2616
    },
    {
      "epoch": 0.7658764998536728,
      "grad_norm": 0.0022833487018942833,
      "learning_rate": 3.085308750365818e-05,
      "loss": 0.0,
      "step": 2617
    },
    {
      "epoch": 0.7661691542288557,
      "grad_norm": 0.0006077625439502299,
      "learning_rate": 3.084577114427861e-05,
      "loss": 0.0,
      "step": 2618
    },
    {
      "epoch": 0.7664618086040387,
      "grad_norm": 0.0006674678879790008,
      "learning_rate": 3.0838454784899036e-05,
      "loss": 0.0,
      "step": 2619
    },
    {
      "epoch": 0.7667544629792216,
      "grad_norm": 6.559497356414795,
      "learning_rate": 3.0831138425519464e-05,
      "loss": 0.0244,
      "step": 2620
    },
    {
      "epoch": 0.7670471173544045,
      "grad_norm": 0.0005603537429124117,
      "learning_rate": 3.082382206613989e-05,
      "loss": 0.0,
      "step": 2621
    },
    {
      "epoch": 0.7673397717295873,
      "grad_norm": 0.03798175975680351,
      "learning_rate": 3.081650570676032e-05,
      "loss": 0.0002,
      "step": 2622
    },
    {
      "epoch": 0.7676324261047702,
      "grad_norm": 0.000654637289699167,
      "learning_rate": 3.080918934738075e-05,
      "loss": 0.0,
      "step": 2623
    },
    {
      "epoch": 0.7679250804799532,
      "grad_norm": 0.0007277166587300599,
      "learning_rate": 3.0801872988001175e-05,
      "loss": 0.0,
      "step": 2624
    },
    {
      "epoch": 0.7682177348551361,
      "grad_norm": 0.0008071591728366911,
      "learning_rate": 3.07945566286216e-05,
      "loss": 0.0,
      "step": 2625
    },
    {
      "epoch": 0.768510389230319,
      "grad_norm": 0.026575203984975815,
      "learning_rate": 3.078724026924203e-05,
      "loss": 0.0001,
      "step": 2626
    },
    {
      "epoch": 0.7688030436055019,
      "grad_norm": 0.0014964027795940638,
      "learning_rate": 3.077992390986245e-05,
      "loss": 0.0,
      "step": 2627
    },
    {
      "epoch": 0.7690956979806848,
      "grad_norm": 0.0101132458075881,
      "learning_rate": 3.077260755048288e-05,
      "loss": 0.0001,
      "step": 2628
    },
    {
      "epoch": 0.7693883523558677,
      "grad_norm": 0.007380722090601921,
      "learning_rate": 3.076529119110331e-05,
      "loss": 0.0,
      "step": 2629
    },
    {
      "epoch": 0.7696810067310507,
      "grad_norm": 0.0011052581248804927,
      "learning_rate": 3.0757974831723736e-05,
      "loss": 0.0,
      "step": 2630
    },
    {
      "epoch": 0.7699736611062336,
      "grad_norm": 24.553504943847656,
      "learning_rate": 3.0750658472344164e-05,
      "loss": 0.037,
      "step": 2631
    },
    {
      "epoch": 0.7702663154814164,
      "grad_norm": 0.040060315281152725,
      "learning_rate": 3.074334211296459e-05,
      "loss": 0.0002,
      "step": 2632
    },
    {
      "epoch": 0.7705589698565993,
      "grad_norm": 0.0004694352683145553,
      "learning_rate": 3.073602575358502e-05,
      "loss": 0.0,
      "step": 2633
    },
    {
      "epoch": 0.7708516242317822,
      "grad_norm": 18.785173416137695,
      "learning_rate": 3.072870939420545e-05,
      "loss": 0.1991,
      "step": 2634
    },
    {
      "epoch": 0.7711442786069652,
      "grad_norm": 0.0014431950403377414,
      "learning_rate": 3.0721393034825876e-05,
      "loss": 0.0,
      "step": 2635
    },
    {
      "epoch": 0.7714369329821481,
      "grad_norm": 8.37049388885498,
      "learning_rate": 3.0714076675446304e-05,
      "loss": 0.1484,
      "step": 2636
    },
    {
      "epoch": 0.771729587357331,
      "grad_norm": 0.001613378757610917,
      "learning_rate": 3.070676031606673e-05,
      "loss": 0.0,
      "step": 2637
    },
    {
      "epoch": 0.7720222417325139,
      "grad_norm": 6.582139015197754,
      "learning_rate": 3.069944395668715e-05,
      "loss": 0.0371,
      "step": 2638
    },
    {
      "epoch": 0.7723148961076968,
      "grad_norm": 0.51373291015625,
      "learning_rate": 3.069212759730758e-05,
      "loss": 0.0014,
      "step": 2639
    },
    {
      "epoch": 0.7726075504828798,
      "grad_norm": 18.354637145996094,
      "learning_rate": 3.068481123792801e-05,
      "loss": 0.1058,
      "step": 2640
    },
    {
      "epoch": 0.7729002048580627,
      "grad_norm": 0.016380123794078827,
      "learning_rate": 3.067749487854844e-05,
      "loss": 0.0001,
      "step": 2641
    },
    {
      "epoch": 0.7731928592332455,
      "grad_norm": 0.001659467234276235,
      "learning_rate": 3.0670178519168865e-05,
      "loss": 0.0,
      "step": 2642
    },
    {
      "epoch": 0.7734855136084284,
      "grad_norm": 0.0006678146310150623,
      "learning_rate": 3.066286215978929e-05,
      "loss": 0.0,
      "step": 2643
    },
    {
      "epoch": 0.7737781679836113,
      "grad_norm": 0.13738636672496796,
      "learning_rate": 3.065554580040972e-05,
      "loss": 0.0005,
      "step": 2644
    },
    {
      "epoch": 0.7740708223587943,
      "grad_norm": 0.0005895581562072039,
      "learning_rate": 3.064822944103015e-05,
      "loss": 0.0,
      "step": 2645
    },
    {
      "epoch": 0.7743634767339772,
      "grad_norm": 0.05153066664934158,
      "learning_rate": 3.064091308165058e-05,
      "loss": 0.0003,
      "step": 2646
    },
    {
      "epoch": 0.7746561311091601,
      "grad_norm": 0.002324148081243038,
      "learning_rate": 3.0633596722271005e-05,
      "loss": 0.0,
      "step": 2647
    },
    {
      "epoch": 0.774948785484343,
      "grad_norm": 0.004383832681924105,
      "learning_rate": 3.0626280362891426e-05,
      "loss": 0.0001,
      "step": 2648
    },
    {
      "epoch": 0.7752414398595259,
      "grad_norm": 0.005408088676631451,
      "learning_rate": 3.0618964003511854e-05,
      "loss": 0.0001,
      "step": 2649
    },
    {
      "epoch": 0.7755340942347088,
      "grad_norm": 0.021713461726903915,
      "learning_rate": 3.061164764413228e-05,
      "loss": 0.0001,
      "step": 2650
    },
    {
      "epoch": 0.7758267486098918,
      "grad_norm": 0.003193659009411931,
      "learning_rate": 3.060433128475271e-05,
      "loss": 0.0001,
      "step": 2651
    },
    {
      "epoch": 0.7761194029850746,
      "grad_norm": 8.086435317993164,
      "learning_rate": 3.059701492537314e-05,
      "loss": 0.1508,
      "step": 2652
    },
    {
      "epoch": 0.7764120573602575,
      "grad_norm": 0.012869983911514282,
      "learning_rate": 3.0589698565993566e-05,
      "loss": 0.0002,
      "step": 2653
    },
    {
      "epoch": 0.7767047117354404,
      "grad_norm": 0.01338435709476471,
      "learning_rate": 3.0582382206613994e-05,
      "loss": 0.0001,
      "step": 2654
    },
    {
      "epoch": 0.7769973661106233,
      "grad_norm": 0.0036994232796132565,
      "learning_rate": 3.057506584723442e-05,
      "loss": 0.0001,
      "step": 2655
    },
    {
      "epoch": 0.7772900204858063,
      "grad_norm": 0.0025011140387505293,
      "learning_rate": 3.056774948785485e-05,
      "loss": 0.0,
      "step": 2656
    },
    {
      "epoch": 0.7775826748609892,
      "grad_norm": 9.787667274475098,
      "learning_rate": 3.056043312847528e-05,
      "loss": 0.1411,
      "step": 2657
    },
    {
      "epoch": 0.7778753292361721,
      "grad_norm": 0.08533408492803574,
      "learning_rate": 3.0553116769095705e-05,
      "loss": 0.0004,
      "step": 2658
    },
    {
      "epoch": 0.778167983611355,
      "grad_norm": 0.024230292066931725,
      "learning_rate": 3.0545800409716126e-05,
      "loss": 0.0004,
      "step": 2659
    },
    {
      "epoch": 0.7784606379865379,
      "grad_norm": 0.007551426533609629,
      "learning_rate": 3.0538484050336554e-05,
      "loss": 0.0001,
      "step": 2660
    },
    {
      "epoch": 0.7787532923617209,
      "grad_norm": 0.005011970642954111,
      "learning_rate": 3.053116769095698e-05,
      "loss": 0.0001,
      "step": 2661
    },
    {
      "epoch": 0.7790459467369037,
      "grad_norm": 0.004218139685690403,
      "learning_rate": 3.052385133157741e-05,
      "loss": 0.0001,
      "step": 2662
    },
    {
      "epoch": 0.7793386011120866,
      "grad_norm": 0.0030332435853779316,
      "learning_rate": 3.0516534972197835e-05,
      "loss": 0.0001,
      "step": 2663
    },
    {
      "epoch": 0.7796312554872695,
      "grad_norm": 0.14285200834274292,
      "learning_rate": 3.0509218612818263e-05,
      "loss": 0.0005,
      "step": 2664
    },
    {
      "epoch": 0.7799239098624524,
      "grad_norm": 6.031563758850098,
      "learning_rate": 3.050190225343869e-05,
      "loss": 0.0279,
      "step": 2665
    },
    {
      "epoch": 0.7802165642376353,
      "grad_norm": 0.005401406437158585,
      "learning_rate": 3.049458589405912e-05,
      "loss": 0.0001,
      "step": 2666
    },
    {
      "epoch": 0.7805092186128183,
      "grad_norm": 0.005855087656527758,
      "learning_rate": 3.0487269534679547e-05,
      "loss": 0.0001,
      "step": 2667
    },
    {
      "epoch": 0.7808018729880012,
      "grad_norm": 0.006042785011231899,
      "learning_rate": 3.0479953175299975e-05,
      "loss": 0.0001,
      "step": 2668
    },
    {
      "epoch": 0.7810945273631841,
      "grad_norm": 0.0059804487973451614,
      "learning_rate": 3.0472636815920396e-05,
      "loss": 0.0001,
      "step": 2669
    },
    {
      "epoch": 0.781387181738367,
      "grad_norm": 0.01024722307920456,
      "learning_rate": 3.0465320456540824e-05,
      "loss": 0.0001,
      "step": 2670
    },
    {
      "epoch": 0.7816798361135499,
      "grad_norm": 15.585776329040527,
      "learning_rate": 3.0458004097161252e-05,
      "loss": 0.2139,
      "step": 2671
    },
    {
      "epoch": 0.7819724904887329,
      "grad_norm": 0.005884439684450626,
      "learning_rate": 3.045068773778168e-05,
      "loss": 0.0001,
      "step": 2672
    },
    {
      "epoch": 0.7822651448639157,
      "grad_norm": 0.016200028359889984,
      "learning_rate": 3.0443371378402108e-05,
      "loss": 0.0002,
      "step": 2673
    },
    {
      "epoch": 0.7825577992390986,
      "grad_norm": 0.008441347628831863,
      "learning_rate": 3.0436055019022536e-05,
      "loss": 0.0001,
      "step": 2674
    },
    {
      "epoch": 0.7828504536142815,
      "grad_norm": 0.6087526082992554,
      "learning_rate": 3.0428738659642964e-05,
      "loss": 0.0025,
      "step": 2675
    },
    {
      "epoch": 0.7831431079894644,
      "grad_norm": 0.013706745579838753,
      "learning_rate": 3.042142230026339e-05,
      "loss": 0.0001,
      "step": 2676
    },
    {
      "epoch": 0.7834357623646474,
      "grad_norm": 0.012047035619616508,
      "learning_rate": 3.041410594088382e-05,
      "loss": 0.0002,
      "step": 2677
    },
    {
      "epoch": 0.7837284167398303,
      "grad_norm": 0.0010208478197455406,
      "learning_rate": 3.0406789581504247e-05,
      "loss": 0.0,
      "step": 2678
    },
    {
      "epoch": 0.7840210711150132,
      "grad_norm": 0.008369138464331627,
      "learning_rate": 3.0399473222124675e-05,
      "loss": 0.0001,
      "step": 2679
    },
    {
      "epoch": 0.7843137254901961,
      "grad_norm": 0.01685381308197975,
      "learning_rate": 3.0392156862745097e-05,
      "loss": 0.0002,
      "step": 2680
    },
    {
      "epoch": 0.784606379865379,
      "grad_norm": 0.004259116016328335,
      "learning_rate": 3.0384840503365524e-05,
      "loss": 0.0001,
      "step": 2681
    },
    {
      "epoch": 0.7848990342405618,
      "grad_norm": 7.433053970336914,
      "learning_rate": 3.0377524143985952e-05,
      "loss": 0.0862,
      "step": 2682
    },
    {
      "epoch": 0.7851916886157448,
      "grad_norm": 0.004358033649623394,
      "learning_rate": 3.037020778460638e-05,
      "loss": 0.0001,
      "step": 2683
    },
    {
      "epoch": 0.7854843429909277,
      "grad_norm": 0.2607213854789734,
      "learning_rate": 3.036289142522681e-05,
      "loss": 0.0012,
      "step": 2684
    },
    {
      "epoch": 0.7857769973661106,
      "grad_norm": 0.1773947924375534,
      "learning_rate": 3.0355575065847236e-05,
      "loss": 0.0011,
      "step": 2685
    },
    {
      "epoch": 0.7860696517412935,
      "grad_norm": 0.0020568312611430883,
      "learning_rate": 3.0348258706467664e-05,
      "loss": 0.0,
      "step": 2686
    },
    {
      "epoch": 0.7863623061164764,
      "grad_norm": 3.2226767539978027,
      "learning_rate": 3.0340942347088092e-05,
      "loss": 0.0175,
      "step": 2687
    },
    {
      "epoch": 0.7866549604916594,
      "grad_norm": 0.8149426579475403,
      "learning_rate": 3.033362598770852e-05,
      "loss": 0.0041,
      "step": 2688
    },
    {
      "epoch": 0.7869476148668423,
      "grad_norm": 0.008766296319663525,
      "learning_rate": 3.0326309628328948e-05,
      "loss": 0.0001,
      "step": 2689
    },
    {
      "epoch": 0.7872402692420252,
      "grad_norm": 0.008891499601304531,
      "learning_rate": 3.0318993268949376e-05,
      "loss": 0.0002,
      "step": 2690
    },
    {
      "epoch": 0.7875329236172081,
      "grad_norm": 0.019587429240345955,
      "learning_rate": 3.0311676909569797e-05,
      "loss": 0.0003,
      "step": 2691
    },
    {
      "epoch": 0.787825577992391,
      "grad_norm": 0.0022967993281781673,
      "learning_rate": 3.0304360550190225e-05,
      "loss": 0.0001,
      "step": 2692
    },
    {
      "epoch": 0.788118232367574,
      "grad_norm": 0.057845134288072586,
      "learning_rate": 3.0297044190810653e-05,
      "loss": 0.0004,
      "step": 2693
    },
    {
      "epoch": 0.7884108867427568,
      "grad_norm": 0.01102588139474392,
      "learning_rate": 3.028972783143108e-05,
      "loss": 0.0002,
      "step": 2694
    },
    {
      "epoch": 0.7887035411179397,
      "grad_norm": 5.191501617431641,
      "learning_rate": 3.028241147205151e-05,
      "loss": 0.1501,
      "step": 2695
    },
    {
      "epoch": 0.7889961954931226,
      "grad_norm": 0.02012191154062748,
      "learning_rate": 3.0275095112671937e-05,
      "loss": 0.0002,
      "step": 2696
    },
    {
      "epoch": 0.7892888498683055,
      "grad_norm": 0.002266214694827795,
      "learning_rate": 3.0267778753292365e-05,
      "loss": 0.0,
      "step": 2697
    },
    {
      "epoch": 0.7895815042434884,
      "grad_norm": 0.017709849402308464,
      "learning_rate": 3.0260462393912793e-05,
      "loss": 0.0002,
      "step": 2698
    },
    {
      "epoch": 0.7898741586186714,
      "grad_norm": 0.08727958798408508,
      "learning_rate": 3.025314603453322e-05,
      "loss": 0.0009,
      "step": 2699
    },
    {
      "epoch": 0.7901668129938543,
      "grad_norm": 0.0019750255160033703,
      "learning_rate": 3.024582967515365e-05,
      "loss": 0.0001,
      "step": 2700
    },
    {
      "epoch": 0.7904594673690372,
      "grad_norm": 0.08402732014656067,
      "learning_rate": 3.023851331577407e-05,
      "loss": 0.0008,
      "step": 2701
    },
    {
      "epoch": 0.79075212174422,
      "grad_norm": 0.008780294097959995,
      "learning_rate": 3.0231196956394498e-05,
      "loss": 0.0001,
      "step": 2702
    },
    {
      "epoch": 0.7910447761194029,
      "grad_norm": 0.0077883535996079445,
      "learning_rate": 3.0223880597014926e-05,
      "loss": 0.0001,
      "step": 2703
    },
    {
      "epoch": 0.7913374304945859,
      "grad_norm": 0.08220571279525757,
      "learning_rate": 3.0216564237635354e-05,
      "loss": 0.0006,
      "step": 2704
    },
    {
      "epoch": 0.7916300848697688,
      "grad_norm": 0.11518245935440063,
      "learning_rate": 3.0209247878255782e-05,
      "loss": 0.0008,
      "step": 2705
    },
    {
      "epoch": 0.7919227392449517,
      "grad_norm": 0.010135181248188019,
      "learning_rate": 3.020193151887621e-05,
      "loss": 0.0002,
      "step": 2706
    },
    {
      "epoch": 0.7922153936201346,
      "grad_norm": 0.0184785146266222,
      "learning_rate": 3.0194615159496638e-05,
      "loss": 0.0002,
      "step": 2707
    },
    {
      "epoch": 0.7925080479953175,
      "grad_norm": 0.02294454351067543,
      "learning_rate": 3.0187298800117066e-05,
      "loss": 0.0002,
      "step": 2708
    },
    {
      "epoch": 0.7928007023705005,
      "grad_norm": 0.014910697937011719,
      "learning_rate": 3.0179982440737494e-05,
      "loss": 0.0002,
      "step": 2709
    },
    {
      "epoch": 0.7930933567456834,
      "grad_norm": 0.0005895071662962437,
      "learning_rate": 3.017266608135792e-05,
      "loss": 0.0,
      "step": 2710
    },
    {
      "epoch": 0.7933860111208663,
      "grad_norm": 0.0028742244467139244,
      "learning_rate": 3.0165349721978346e-05,
      "loss": 0.0001,
      "step": 2711
    },
    {
      "epoch": 0.7936786654960492,
      "grad_norm": 0.019316155463457108,
      "learning_rate": 3.015803336259877e-05,
      "loss": 0.0002,
      "step": 2712
    },
    {
      "epoch": 0.793971319871232,
      "grad_norm": 0.00208294321782887,
      "learning_rate": 3.01507170032192e-05,
      "loss": 0.0,
      "step": 2713
    },
    {
      "epoch": 0.794263974246415,
      "grad_norm": 0.24471159279346466,
      "learning_rate": 3.0143400643839626e-05,
      "loss": 0.0015,
      "step": 2714
    },
    {
      "epoch": 0.7945566286215979,
      "grad_norm": 0.0016943076625466347,
      "learning_rate": 3.0136084284460054e-05,
      "loss": 0.0,
      "step": 2715
    },
    {
      "epoch": 0.7948492829967808,
      "grad_norm": 0.06629418581724167,
      "learning_rate": 3.0128767925080482e-05,
      "loss": 0.0004,
      "step": 2716
    },
    {
      "epoch": 0.7951419373719637,
      "grad_norm": 0.25379154086112976,
      "learning_rate": 3.012145156570091e-05,
      "loss": 0.0017,
      "step": 2717
    },
    {
      "epoch": 0.7954345917471466,
      "grad_norm": 0.00131731026340276,
      "learning_rate": 3.0114135206321338e-05,
      "loss": 0.0,
      "step": 2718
    },
    {
      "epoch": 0.7957272461223295,
      "grad_norm": 0.012910161167383194,
      "learning_rate": 3.0106818846941763e-05,
      "loss": 0.0002,
      "step": 2719
    },
    {
      "epoch": 0.7960199004975125,
      "grad_norm": 0.0022732571233063936,
      "learning_rate": 3.009950248756219e-05,
      "loss": 0.0,
      "step": 2720
    },
    {
      "epoch": 0.7963125548726954,
      "grad_norm": 0.0017511585028842092,
      "learning_rate": 3.009218612818262e-05,
      "loss": 0.0,
      "step": 2721
    },
    {
      "epoch": 0.7966052092478783,
      "grad_norm": 0.0016778865829110146,
      "learning_rate": 3.0084869768803043e-05,
      "loss": 0.0,
      "step": 2722
    },
    {
      "epoch": 0.7968978636230611,
      "grad_norm": 0.0068087331019341946,
      "learning_rate": 3.007755340942347e-05,
      "loss": 0.0001,
      "step": 2723
    },
    {
      "epoch": 0.797190517998244,
      "grad_norm": 0.0014441823586821556,
      "learning_rate": 3.00702370500439e-05,
      "loss": 0.0,
      "step": 2724
    },
    {
      "epoch": 0.797483172373427,
      "grad_norm": 0.0011686611687764525,
      "learning_rate": 3.0062920690664327e-05,
      "loss": 0.0,
      "step": 2725
    },
    {
      "epoch": 0.7977758267486099,
      "grad_norm": 0.8683436512947083,
      "learning_rate": 3.0055604331284755e-05,
      "loss": 0.0021,
      "step": 2726
    },
    {
      "epoch": 0.7980684811237928,
      "grad_norm": 0.02965354174375534,
      "learning_rate": 3.004828797190518e-05,
      "loss": 0.0002,
      "step": 2727
    },
    {
      "epoch": 0.7983611354989757,
      "grad_norm": 0.008388472720980644,
      "learning_rate": 3.0040971612525608e-05,
      "loss": 0.0001,
      "step": 2728
    },
    {
      "epoch": 0.7986537898741586,
      "grad_norm": 0.003930417355149984,
      "learning_rate": 3.0033655253146036e-05,
      "loss": 0.0001,
      "step": 2729
    },
    {
      "epoch": 0.7989464442493416,
      "grad_norm": 0.0007542763487435877,
      "learning_rate": 3.0026338893766464e-05,
      "loss": 0.0,
      "step": 2730
    },
    {
      "epoch": 0.7992390986245245,
      "grad_norm": 0.0011721195187419653,
      "learning_rate": 3.001902253438689e-05,
      "loss": 0.0,
      "step": 2731
    },
    {
      "epoch": 0.7995317529997074,
      "grad_norm": 9.9760103225708,
      "learning_rate": 3.001170617500732e-05,
      "loss": 0.0838,
      "step": 2732
    },
    {
      "epoch": 0.7998244073748902,
      "grad_norm": 0.011321314610540867,
      "learning_rate": 3.0004389815627744e-05,
      "loss": 0.0001,
      "step": 2733
    },
    {
      "epoch": 0.8001170617500731,
      "grad_norm": 0.0004917927435599267,
      "learning_rate": 2.9997073456248172e-05,
      "loss": 0.0,
      "step": 2734
    },
    {
      "epoch": 0.800409716125256,
      "grad_norm": 0.005929878912866116,
      "learning_rate": 2.9989757096868597e-05,
      "loss": 0.0001,
      "step": 2735
    },
    {
      "epoch": 0.800702370500439,
      "grad_norm": 0.002472624648362398,
      "learning_rate": 2.9982440737489024e-05,
      "loss": 0.0,
      "step": 2736
    },
    {
      "epoch": 0.8009950248756219,
      "grad_norm": 0.009232684969902039,
      "learning_rate": 2.9975124378109452e-05,
      "loss": 0.0001,
      "step": 2737
    },
    {
      "epoch": 0.8012876792508048,
      "grad_norm": 0.004552721511572599,
      "learning_rate": 2.996780801872988e-05,
      "loss": 0.0001,
      "step": 2738
    },
    {
      "epoch": 0.8015803336259877,
      "grad_norm": 0.0003590781125240028,
      "learning_rate": 2.9960491659350308e-05,
      "loss": 0.0,
      "step": 2739
    },
    {
      "epoch": 0.8018729880011706,
      "grad_norm": 0.0006961704348213971,
      "learning_rate": 2.9953175299970736e-05,
      "loss": 0.0,
      "step": 2740
    },
    {
      "epoch": 0.8021656423763536,
      "grad_norm": 0.002296611201018095,
      "learning_rate": 2.9945858940591164e-05,
      "loss": 0.0,
      "step": 2741
    },
    {
      "epoch": 0.8024582967515365,
      "grad_norm": 0.0015559152234345675,
      "learning_rate": 2.9938542581211592e-05,
      "loss": 0.0,
      "step": 2742
    },
    {
      "epoch": 0.8027509511267193,
      "grad_norm": 0.0012545986101031303,
      "learning_rate": 2.993122622183202e-05,
      "loss": 0.0,
      "step": 2743
    },
    {
      "epoch": 0.8030436055019022,
      "grad_norm": 0.0018432078650221229,
      "learning_rate": 2.992390986245244e-05,
      "loss": 0.0,
      "step": 2744
    },
    {
      "epoch": 0.8033362598770851,
      "grad_norm": 11.844979286193848,
      "learning_rate": 2.991659350307287e-05,
      "loss": 0.0336,
      "step": 2745
    },
    {
      "epoch": 0.8036289142522681,
      "grad_norm": 0.0009786701994016767,
      "learning_rate": 2.9909277143693297e-05,
      "loss": 0.0,
      "step": 2746
    },
    {
      "epoch": 0.803921568627451,
      "grad_norm": 0.055951718240976334,
      "learning_rate": 2.9901960784313725e-05,
      "loss": 0.0002,
      "step": 2747
    },
    {
      "epoch": 0.8042142230026339,
      "grad_norm": 0.0006754621281288564,
      "learning_rate": 2.9894644424934153e-05,
      "loss": 0.0,
      "step": 2748
    },
    {
      "epoch": 0.8045068773778168,
      "grad_norm": 0.012983710505068302,
      "learning_rate": 2.988732806555458e-05,
      "loss": 0.0001,
      "step": 2749
    },
    {
      "epoch": 0.8047995317529997,
      "grad_norm": 0.0006265717092901468,
      "learning_rate": 2.988001170617501e-05,
      "loss": 0.0,
      "step": 2750
    },
    {
      "epoch": 0.8050921861281826,
      "grad_norm": 0.00151202199049294,
      "learning_rate": 2.9872695346795437e-05,
      "loss": 0.0,
      "step": 2751
    },
    {
      "epoch": 0.8053848405033656,
      "grad_norm": 0.0005175884580239654,
      "learning_rate": 2.9865378987415865e-05,
      "loss": 0.0,
      "step": 2752
    },
    {
      "epoch": 0.8056774948785485,
      "grad_norm": 0.0009235625620931387,
      "learning_rate": 2.9858062628036293e-05,
      "loss": 0.0,
      "step": 2753
    },
    {
      "epoch": 0.8059701492537313,
      "grad_norm": 9.69883918762207,
      "learning_rate": 2.9850746268656714e-05,
      "loss": 0.1382,
      "step": 2754
    },
    {
      "epoch": 0.8062628036289142,
      "grad_norm": 0.0002538673870731145,
      "learning_rate": 2.9843429909277142e-05,
      "loss": 0.0,
      "step": 2755
    },
    {
      "epoch": 0.8065554580040971,
      "grad_norm": 0.20520877838134766,
      "learning_rate": 2.983611354989757e-05,
      "loss": 0.0008,
      "step": 2756
    },
    {
      "epoch": 0.8068481123792801,
      "grad_norm": 0.02855227142572403,
      "learning_rate": 2.9828797190517998e-05,
      "loss": 0.0002,
      "step": 2757
    },
    {
      "epoch": 0.807140766754463,
      "grad_norm": 0.0006890453514643013,
      "learning_rate": 2.9821480831138426e-05,
      "loss": 0.0,
      "step": 2758
    },
    {
      "epoch": 0.8074334211296459,
      "grad_norm": 0.0015851255739107728,
      "learning_rate": 2.9814164471758854e-05,
      "loss": 0.0,
      "step": 2759
    },
    {
      "epoch": 0.8077260755048288,
      "grad_norm": 0.0012363274581730366,
      "learning_rate": 2.9806848112379282e-05,
      "loss": 0.0,
      "step": 2760
    },
    {
      "epoch": 0.8080187298800117,
      "grad_norm": 0.0014085586881265044,
      "learning_rate": 2.979953175299971e-05,
      "loss": 0.0,
      "step": 2761
    },
    {
      "epoch": 0.8083113842551947,
      "grad_norm": 0.0009393296204507351,
      "learning_rate": 2.9792215393620138e-05,
      "loss": 0.0,
      "step": 2762
    },
    {
      "epoch": 0.8086040386303776,
      "grad_norm": 0.00327840237878263,
      "learning_rate": 2.9784899034240566e-05,
      "loss": 0.0,
      "step": 2763
    },
    {
      "epoch": 0.8088966930055604,
      "grad_norm": 0.0021783942356705666,
      "learning_rate": 2.9777582674860994e-05,
      "loss": 0.0,
      "step": 2764
    },
    {
      "epoch": 0.8091893473807433,
      "grad_norm": 0.01263353694230318,
      "learning_rate": 2.9770266315481415e-05,
      "loss": 0.0001,
      "step": 2765
    },
    {
      "epoch": 0.8094820017559262,
      "grad_norm": 0.001956967869773507,
      "learning_rate": 2.9762949956101843e-05,
      "loss": 0.0,
      "step": 2766
    },
    {
      "epoch": 0.8097746561311091,
      "grad_norm": 0.00973919965326786,
      "learning_rate": 2.975563359672227e-05,
      "loss": 0.0001,
      "step": 2767
    },
    {
      "epoch": 0.8100673105062921,
      "grad_norm": 0.2914956510066986,
      "learning_rate": 2.97483172373427e-05,
      "loss": 0.0016,
      "step": 2768
    },
    {
      "epoch": 0.810359964881475,
      "grad_norm": 0.04364306107163429,
      "learning_rate": 2.9741000877963126e-05,
      "loss": 0.0003,
      "step": 2769
    },
    {
      "epoch": 0.8106526192566579,
      "grad_norm": 0.008897513151168823,
      "learning_rate": 2.9733684518583554e-05,
      "loss": 0.0001,
      "step": 2770
    },
    {
      "epoch": 0.8109452736318408,
      "grad_norm": 0.0018379120156168938,
      "learning_rate": 2.9726368159203982e-05,
      "loss": 0.0,
      "step": 2771
    },
    {
      "epoch": 0.8112379280070237,
      "grad_norm": 0.0035767194349318743,
      "learning_rate": 2.971905179982441e-05,
      "loss": 0.0001,
      "step": 2772
    },
    {
      "epoch": 0.8115305823822067,
      "grad_norm": 0.12195797264575958,
      "learning_rate": 2.9711735440444838e-05,
      "loss": 0.0005,
      "step": 2773
    },
    {
      "epoch": 0.8118232367573895,
      "grad_norm": 0.0010022588539868593,
      "learning_rate": 2.9704419081065266e-05,
      "loss": 0.0,
      "step": 2774
    },
    {
      "epoch": 0.8121158911325724,
      "grad_norm": 0.0004416050505824387,
      "learning_rate": 2.9697102721685687e-05,
      "loss": 0.0,
      "step": 2775
    },
    {
      "epoch": 0.8124085455077553,
      "grad_norm": 0.002000578213483095,
      "learning_rate": 2.9689786362306115e-05,
      "loss": 0.0,
      "step": 2776
    },
    {
      "epoch": 0.8127011998829382,
      "grad_norm": 0.0013109841383993626,
      "learning_rate": 2.9682470002926543e-05,
      "loss": 0.0,
      "step": 2777
    },
    {
      "epoch": 0.8129938542581212,
      "grad_norm": 0.002561133122071624,
      "learning_rate": 2.967515364354697e-05,
      "loss": 0.0,
      "step": 2778
    },
    {
      "epoch": 0.8132865086333041,
      "grad_norm": 0.001267809304408729,
      "learning_rate": 2.96678372841674e-05,
      "loss": 0.0,
      "step": 2779
    },
    {
      "epoch": 0.813579163008487,
      "grad_norm": 0.0004984505940228701,
      "learning_rate": 2.9660520924787827e-05,
      "loss": 0.0,
      "step": 2780
    },
    {
      "epoch": 0.8138718173836699,
      "grad_norm": 0.006047519389539957,
      "learning_rate": 2.9653204565408255e-05,
      "loss": 0.0001,
      "step": 2781
    },
    {
      "epoch": 0.8141644717588528,
      "grad_norm": 0.0013361189048737288,
      "learning_rate": 2.9645888206028683e-05,
      "loss": 0.0,
      "step": 2782
    },
    {
      "epoch": 0.8144571261340358,
      "grad_norm": 0.0007503708475269377,
      "learning_rate": 2.963857184664911e-05,
      "loss": 0.0,
      "step": 2783
    },
    {
      "epoch": 0.8147497805092186,
      "grad_norm": 0.05241904780268669,
      "learning_rate": 2.963125548726954e-05,
      "loss": 0.0003,
      "step": 2784
    },
    {
      "epoch": 0.8150424348844015,
      "grad_norm": 0.0008833020692691207,
      "learning_rate": 2.9623939127889967e-05,
      "loss": 0.0,
      "step": 2785
    },
    {
      "epoch": 0.8153350892595844,
      "grad_norm": 0.00027996645076200366,
      "learning_rate": 2.9616622768510388e-05,
      "loss": 0.0,
      "step": 2786
    },
    {
      "epoch": 0.8156277436347673,
      "grad_norm": 0.01899244263768196,
      "learning_rate": 2.9609306409130816e-05,
      "loss": 0.0001,
      "step": 2787
    },
    {
      "epoch": 0.8159203980099502,
      "grad_norm": 0.001283986959606409,
      "learning_rate": 2.9601990049751244e-05,
      "loss": 0.0,
      "step": 2788
    },
    {
      "epoch": 0.8162130523851332,
      "grad_norm": 0.00158262113109231,
      "learning_rate": 2.9594673690371672e-05,
      "loss": 0.0,
      "step": 2789
    },
    {
      "epoch": 0.8165057067603161,
      "grad_norm": 0.0007797530852258205,
      "learning_rate": 2.95873573309921e-05,
      "loss": 0.0,
      "step": 2790
    },
    {
      "epoch": 0.816798361135499,
      "grad_norm": 0.000604621774982661,
      "learning_rate": 2.9580040971612528e-05,
      "loss": 0.0,
      "step": 2791
    },
    {
      "epoch": 0.8170910155106819,
      "grad_norm": 0.37448152899742126,
      "learning_rate": 2.9572724612232956e-05,
      "loss": 0.0012,
      "step": 2792
    },
    {
      "epoch": 0.8173836698858647,
      "grad_norm": 0.0010730307549238205,
      "learning_rate": 2.9565408252853384e-05,
      "loss": 0.0,
      "step": 2793
    },
    {
      "epoch": 0.8176763242610477,
      "grad_norm": 0.0008598492713645101,
      "learning_rate": 2.955809189347381e-05,
      "loss": 0.0,
      "step": 2794
    },
    {
      "epoch": 0.8179689786362306,
      "grad_norm": 0.0010620629182085395,
      "learning_rate": 2.955077553409424e-05,
      "loss": 0.0,
      "step": 2795
    },
    {
      "epoch": 0.8182616330114135,
      "grad_norm": 0.006837981753051281,
      "learning_rate": 2.9543459174714668e-05,
      "loss": 0.0001,
      "step": 2796
    },
    {
      "epoch": 0.8185542873865964,
      "grad_norm": 0.0009813921060413122,
      "learning_rate": 2.953614281533509e-05,
      "loss": 0.0,
      "step": 2797
    },
    {
      "epoch": 0.8188469417617793,
      "grad_norm": 0.024175820872187614,
      "learning_rate": 2.9528826455955517e-05,
      "loss": 0.0001,
      "step": 2798
    },
    {
      "epoch": 0.8191395961369623,
      "grad_norm": 0.011615071445703506,
      "learning_rate": 2.9521510096575945e-05,
      "loss": 0.0001,
      "step": 2799
    },
    {
      "epoch": 0.8194322505121452,
      "grad_norm": 0.0013745144242420793,
      "learning_rate": 2.9514193737196373e-05,
      "loss": 0.0,
      "step": 2800
    },
    {
      "epoch": 0.8197249048873281,
      "grad_norm": 0.0020426204428076744,
      "learning_rate": 2.95068773778168e-05,
      "loss": 0.0,
      "step": 2801
    },
    {
      "epoch": 0.820017559262511,
      "grad_norm": 0.0008682936313562095,
      "learning_rate": 2.949956101843723e-05,
      "loss": 0.0,
      "step": 2802
    },
    {
      "epoch": 0.8203102136376939,
      "grad_norm": 0.0026835596654564142,
      "learning_rate": 2.9492244659057656e-05,
      "loss": 0.0001,
      "step": 2803
    },
    {
      "epoch": 0.8206028680128767,
      "grad_norm": 0.01051302533596754,
      "learning_rate": 2.9484928299678084e-05,
      "loss": 0.0001,
      "step": 2804
    },
    {
      "epoch": 0.8208955223880597,
      "grad_norm": 0.00984960701316595,
      "learning_rate": 2.9477611940298512e-05,
      "loss": 0.0001,
      "step": 2805
    },
    {
      "epoch": 0.8211881767632426,
      "grad_norm": 0.000248409021878615,
      "learning_rate": 2.9470295580918937e-05,
      "loss": 0.0,
      "step": 2806
    },
    {
      "epoch": 0.8214808311384255,
      "grad_norm": 2.868433713912964,
      "learning_rate": 2.946297922153936e-05,
      "loss": 0.0068,
      "step": 2807
    },
    {
      "epoch": 0.8217734855136084,
      "grad_norm": 0.0009482511668466032,
      "learning_rate": 2.945566286215979e-05,
      "loss": 0.0,
      "step": 2808
    },
    {
      "epoch": 0.8220661398887913,
      "grad_norm": 0.0003328518941998482,
      "learning_rate": 2.9448346502780217e-05,
      "loss": 0.0,
      "step": 2809
    },
    {
      "epoch": 0.8223587942639743,
      "grad_norm": 0.008871063590049744,
      "learning_rate": 2.9441030143400645e-05,
      "loss": 0.0,
      "step": 2810
    },
    {
      "epoch": 0.8226514486391572,
      "grad_norm": 0.00042780142393894494,
      "learning_rate": 2.9433713784021073e-05,
      "loss": 0.0,
      "step": 2811
    },
    {
      "epoch": 0.8229441030143401,
      "grad_norm": 0.0029195339884608984,
      "learning_rate": 2.94263974246415e-05,
      "loss": 0.0,
      "step": 2812
    },
    {
      "epoch": 0.823236757389523,
      "grad_norm": 0.0002829222066793591,
      "learning_rate": 2.941908106526193e-05,
      "loss": 0.0,
      "step": 2813
    },
    {
      "epoch": 0.8235294117647058,
      "grad_norm": 0.0001001347482088022,
      "learning_rate": 2.9411764705882354e-05,
      "loss": 0.0,
      "step": 2814
    },
    {
      "epoch": 0.8238220661398888,
      "grad_norm": 0.0007854485884308815,
      "learning_rate": 2.9404448346502782e-05,
      "loss": 0.0,
      "step": 2815
    },
    {
      "epoch": 0.8241147205150717,
      "grad_norm": 2.1147656440734863,
      "learning_rate": 2.939713198712321e-05,
      "loss": 0.0046,
      "step": 2816
    },
    {
      "epoch": 0.8244073748902546,
      "grad_norm": 0.5754238367080688,
      "learning_rate": 2.9389815627743638e-05,
      "loss": 0.0016,
      "step": 2817
    },
    {
      "epoch": 0.8247000292654375,
      "grad_norm": 0.00013969867723062634,
      "learning_rate": 2.9382499268364062e-05,
      "loss": 0.0,
      "step": 2818
    },
    {
      "epoch": 0.8249926836406204,
      "grad_norm": 0.016234902665019035,
      "learning_rate": 2.937518290898449e-05,
      "loss": 0.0001,
      "step": 2819
    },
    {
      "epoch": 0.8252853380158033,
      "grad_norm": 0.0002993481757584959,
      "learning_rate": 2.9367866549604918e-05,
      "loss": 0.0,
      "step": 2820
    },
    {
      "epoch": 0.8255779923909863,
      "grad_norm": 0.00902794860303402,
      "learning_rate": 2.9360550190225346e-05,
      "loss": 0.0001,
      "step": 2821
    },
    {
      "epoch": 0.8258706467661692,
      "grad_norm": 0.0005412195459939539,
      "learning_rate": 2.935323383084577e-05,
      "loss": 0.0,
      "step": 2822
    },
    {
      "epoch": 0.8261633011413521,
      "grad_norm": 0.0006353114731609821,
      "learning_rate": 2.93459174714662e-05,
      "loss": 0.0,
      "step": 2823
    },
    {
      "epoch": 0.826455955516535,
      "grad_norm": 0.0005318338517099619,
      "learning_rate": 2.9338601112086626e-05,
      "loss": 0.0,
      "step": 2824
    },
    {
      "epoch": 0.8267486098917178,
      "grad_norm": 0.006010818760842085,
      "learning_rate": 2.9331284752707054e-05,
      "loss": 0.0,
      "step": 2825
    },
    {
      "epoch": 0.8270412642669008,
      "grad_norm": 0.014696326106786728,
      "learning_rate": 2.9323968393327482e-05,
      "loss": 0.0001,
      "step": 2826
    },
    {
      "epoch": 0.8273339186420837,
      "grad_norm": 0.02982524037361145,
      "learning_rate": 2.931665203394791e-05,
      "loss": 0.0001,
      "step": 2827
    },
    {
      "epoch": 0.8276265730172666,
      "grad_norm": 0.060100823640823364,
      "learning_rate": 2.9309335674568338e-05,
      "loss": 0.0004,
      "step": 2828
    },
    {
      "epoch": 0.8279192273924495,
      "grad_norm": 0.007390571292489767,
      "learning_rate": 2.9302019315188763e-05,
      "loss": 0.0,
      "step": 2829
    },
    {
      "epoch": 0.8282118817676324,
      "grad_norm": 0.0005158820422366261,
      "learning_rate": 2.9294702955809187e-05,
      "loss": 0.0,
      "step": 2830
    },
    {
      "epoch": 0.8285045361428154,
      "grad_norm": 0.43470558524131775,
      "learning_rate": 2.9287386596429615e-05,
      "loss": 0.0011,
      "step": 2831
    },
    {
      "epoch": 0.8287971905179983,
      "grad_norm": 14.537055969238281,
      "learning_rate": 2.9280070237050043e-05,
      "loss": 0.0477,
      "step": 2832
    },
    {
      "epoch": 0.8290898448931812,
      "grad_norm": 0.0005331973661668599,
      "learning_rate": 2.927275387767047e-05,
      "loss": 0.0,
      "step": 2833
    },
    {
      "epoch": 0.829382499268364,
      "grad_norm": 0.0004824897332582623,
      "learning_rate": 2.92654375182909e-05,
      "loss": 0.0,
      "step": 2834
    },
    {
      "epoch": 0.8296751536435469,
      "grad_norm": 4.611309051513672,
      "learning_rate": 2.9258121158911327e-05,
      "loss": 0.2556,
      "step": 2835
    },
    {
      "epoch": 0.8299678080187298,
      "grad_norm": 0.006274438463151455,
      "learning_rate": 2.9250804799531755e-05,
      "loss": 0.0,
      "step": 2836
    },
    {
      "epoch": 0.8302604623939128,
      "grad_norm": 0.00011106379679404199,
      "learning_rate": 2.9243488440152183e-05,
      "loss": 0.0,
      "step": 2837
    },
    {
      "epoch": 0.8305531167690957,
      "grad_norm": 0.10963346064090729,
      "learning_rate": 2.923617208077261e-05,
      "loss": 0.0005,
      "step": 2838
    },
    {
      "epoch": 0.8308457711442786,
      "grad_norm": 0.0020755550358444452,
      "learning_rate": 2.9228855721393032e-05,
      "loss": 0.0,
      "step": 2839
    },
    {
      "epoch": 0.8311384255194615,
      "grad_norm": 0.00033546079066582024,
      "learning_rate": 2.922153936201346e-05,
      "loss": 0.0,
      "step": 2840
    },
    {
      "epoch": 0.8314310798946444,
      "grad_norm": 0.0009139064350165427,
      "learning_rate": 2.9214223002633888e-05,
      "loss": 0.0,
      "step": 2841
    },
    {
      "epoch": 0.8317237342698274,
      "grad_norm": 0.1672952026128769,
      "learning_rate": 2.9206906643254316e-05,
      "loss": 0.0008,
      "step": 2842
    },
    {
      "epoch": 0.8320163886450103,
      "grad_norm": 2.1475164890289307,
      "learning_rate": 2.9199590283874744e-05,
      "loss": 0.0057,
      "step": 2843
    },
    {
      "epoch": 0.8323090430201932,
      "grad_norm": 0.0007393267005681992,
      "learning_rate": 2.9192273924495172e-05,
      "loss": 0.0,
      "step": 2844
    },
    {
      "epoch": 0.832601697395376,
      "grad_norm": 0.02002253197133541,
      "learning_rate": 2.91849575651156e-05,
      "loss": 0.0001,
      "step": 2845
    },
    {
      "epoch": 0.8328943517705589,
      "grad_norm": 0.02759724296629429,
      "learning_rate": 2.9177641205736028e-05,
      "loss": 0.0001,
      "step": 2846
    },
    {
      "epoch": 0.8331870061457419,
      "grad_norm": 0.004802546929568052,
      "learning_rate": 2.9170324846356456e-05,
      "loss": 0.0,
      "step": 2847
    },
    {
      "epoch": 0.8334796605209248,
      "grad_norm": 0.000317930564051494,
      "learning_rate": 2.9163008486976884e-05,
      "loss": 0.0,
      "step": 2848
    },
    {
      "epoch": 0.8337723148961077,
      "grad_norm": 0.0012358769308775663,
      "learning_rate": 2.915569212759731e-05,
      "loss": 0.0,
      "step": 2849
    },
    {
      "epoch": 0.8340649692712906,
      "grad_norm": 0.8042240738868713,
      "learning_rate": 2.9148375768217733e-05,
      "loss": 0.0028,
      "step": 2850
    },
    {
      "epoch": 0.8343576236464735,
      "grad_norm": 0.0007900151540525258,
      "learning_rate": 2.914105940883816e-05,
      "loss": 0.0,
      "step": 2851
    },
    {
      "epoch": 0.8346502780216565,
      "grad_norm": 0.0005253091221675277,
      "learning_rate": 2.913374304945859e-05,
      "loss": 0.0,
      "step": 2852
    },
    {
      "epoch": 0.8349429323968394,
      "grad_norm": 0.002171743893995881,
      "learning_rate": 2.9126426690079017e-05,
      "loss": 0.0,
      "step": 2853
    },
    {
      "epoch": 0.8352355867720223,
      "grad_norm": 0.0003047423670068383,
      "learning_rate": 2.9119110330699445e-05,
      "loss": 0.0,
      "step": 2854
    },
    {
      "epoch": 0.8355282411472051,
      "grad_norm": 0.0017876947531476617,
      "learning_rate": 2.9111793971319873e-05,
      "loss": 0.0,
      "step": 2855
    },
    {
      "epoch": 0.835820895522388,
      "grad_norm": 0.0004605422727763653,
      "learning_rate": 2.91044776119403e-05,
      "loss": 0.0,
      "step": 2856
    },
    {
      "epoch": 0.8361135498975709,
      "grad_norm": 0.0010576428612694144,
      "learning_rate": 2.909716125256073e-05,
      "loss": 0.0,
      "step": 2857
    },
    {
      "epoch": 0.8364062042727539,
      "grad_norm": 0.0007708283956162632,
      "learning_rate": 2.9089844893181156e-05,
      "loss": 0.0,
      "step": 2858
    },
    {
      "epoch": 0.8366988586479368,
      "grad_norm": 0.0009573538554832339,
      "learning_rate": 2.9082528533801584e-05,
      "loss": 0.0,
      "step": 2859
    },
    {
      "epoch": 0.8369915130231197,
      "grad_norm": 0.000981238903477788,
      "learning_rate": 2.9075212174422006e-05,
      "loss": 0.0,
      "step": 2860
    },
    {
      "epoch": 0.8372841673983026,
      "grad_norm": 1.0129269361495972,
      "learning_rate": 2.9067895815042434e-05,
      "loss": 0.0033,
      "step": 2861
    },
    {
      "epoch": 0.8375768217734855,
      "grad_norm": 0.00044342450564727187,
      "learning_rate": 2.906057945566286e-05,
      "loss": 0.0,
      "step": 2862
    },
    {
      "epoch": 0.8378694761486685,
      "grad_norm": 0.014190975576639175,
      "learning_rate": 2.905326309628329e-05,
      "loss": 0.0001,
      "step": 2863
    },
    {
      "epoch": 0.8381621305238514,
      "grad_norm": 7.357584476470947,
      "learning_rate": 2.9045946736903717e-05,
      "loss": 0.1928,
      "step": 2864
    },
    {
      "epoch": 0.8384547848990342,
      "grad_norm": 0.005698964465409517,
      "learning_rate": 2.9038630377524145e-05,
      "loss": 0.0001,
      "step": 2865
    },
    {
      "epoch": 0.8387474392742171,
      "grad_norm": 0.013709690421819687,
      "learning_rate": 2.9031314018144573e-05,
      "loss": 0.0001,
      "step": 2866
    },
    {
      "epoch": 0.8390400936494,
      "grad_norm": 0.008576486259698868,
      "learning_rate": 2.9023997658765e-05,
      "loss": 0.0001,
      "step": 2867
    },
    {
      "epoch": 0.839332748024583,
      "grad_norm": 0.001753227086737752,
      "learning_rate": 2.901668129938543e-05,
      "loss": 0.0,
      "step": 2868
    },
    {
      "epoch": 0.8396254023997659,
      "grad_norm": 11.56413459777832,
      "learning_rate": 2.9009364940005857e-05,
      "loss": 0.0517,
      "step": 2869
    },
    {
      "epoch": 0.8399180567749488,
      "grad_norm": 0.003031209111213684,
      "learning_rate": 2.9002048580626285e-05,
      "loss": 0.0,
      "step": 2870
    },
    {
      "epoch": 0.8402107111501317,
      "grad_norm": 16.875307083129883,
      "learning_rate": 2.8994732221246706e-05,
      "loss": 0.0994,
      "step": 2871
    },
    {
      "epoch": 0.8405033655253146,
      "grad_norm": 0.0014646684285253286,
      "learning_rate": 2.8987415861867134e-05,
      "loss": 0.0,
      "step": 2872
    },
    {
      "epoch": 0.8407960199004975,
      "grad_norm": 0.0011212496319785714,
      "learning_rate": 2.8980099502487562e-05,
      "loss": 0.0,
      "step": 2873
    },
    {
      "epoch": 0.8410886742756805,
      "grad_norm": 0.0007398988236673176,
      "learning_rate": 2.897278314310799e-05,
      "loss": 0.0,
      "step": 2874
    },
    {
      "epoch": 0.8413813286508633,
      "grad_norm": 0.000355487602064386,
      "learning_rate": 2.8965466783728418e-05,
      "loss": 0.0,
      "step": 2875
    },
    {
      "epoch": 0.8416739830260462,
      "grad_norm": 0.0025122477672994137,
      "learning_rate": 2.8958150424348846e-05,
      "loss": 0.0,
      "step": 2876
    },
    {
      "epoch": 0.8419666374012291,
      "grad_norm": 0.0020755138248205185,
      "learning_rate": 2.8950834064969274e-05,
      "loss": 0.0,
      "step": 2877
    },
    {
      "epoch": 0.842259291776412,
      "grad_norm": 10.628597259521484,
      "learning_rate": 2.8943517705589702e-05,
      "loss": 0.1175,
      "step": 2878
    },
    {
      "epoch": 0.842551946151595,
      "grad_norm": 0.3478909730911255,
      "learning_rate": 2.893620134621013e-05,
      "loss": 0.0008,
      "step": 2879
    },
    {
      "epoch": 0.8428446005267779,
      "grad_norm": 0.0006228281417861581,
      "learning_rate": 2.8928884986830558e-05,
      "loss": 0.0,
      "step": 2880
    },
    {
      "epoch": 0.8431372549019608,
      "grad_norm": 0.009975981898605824,
      "learning_rate": 2.8921568627450986e-05,
      "loss": 0.0001,
      "step": 2881
    },
    {
      "epoch": 0.8434299092771437,
      "grad_norm": 0.00953388586640358,
      "learning_rate": 2.8914252268071407e-05,
      "loss": 0.0001,
      "step": 2882
    },
    {
      "epoch": 0.8437225636523266,
      "grad_norm": 11.161602020263672,
      "learning_rate": 2.8906935908691835e-05,
      "loss": 0.1049,
      "step": 2883
    },
    {
      "epoch": 0.8440152180275096,
      "grad_norm": 0.001079063513316214,
      "learning_rate": 2.8899619549312263e-05,
      "loss": 0.0,
      "step": 2884
    },
    {
      "epoch": 0.8443078724026924,
      "grad_norm": 0.0013235245132818818,
      "learning_rate": 2.889230318993269e-05,
      "loss": 0.0,
      "step": 2885
    },
    {
      "epoch": 0.8446005267778753,
      "grad_norm": 0.0013966573169454932,
      "learning_rate": 2.888498683055312e-05,
      "loss": 0.0,
      "step": 2886
    },
    {
      "epoch": 0.8448931811530582,
      "grad_norm": 0.001036423142068088,
      "learning_rate": 2.8877670471173547e-05,
      "loss": 0.0,
      "step": 2887
    },
    {
      "epoch": 0.8451858355282411,
      "grad_norm": 3.4299228191375732,
      "learning_rate": 2.8870354111793975e-05,
      "loss": 0.0154,
      "step": 2888
    },
    {
      "epoch": 0.845478489903424,
      "grad_norm": 0.004335775505751371,
      "learning_rate": 2.8863037752414403e-05,
      "loss": 0.0001,
      "step": 2889
    },
    {
      "epoch": 0.845771144278607,
      "grad_norm": 0.0013231680495664477,
      "learning_rate": 2.885572139303483e-05,
      "loss": 0.0,
      "step": 2890
    },
    {
      "epoch": 0.8460637986537899,
      "grad_norm": 6.0724663734436035,
      "learning_rate": 2.884840503365526e-05,
      "loss": 0.0914,
      "step": 2891
    },
    {
      "epoch": 0.8463564530289728,
      "grad_norm": 0.01388559490442276,
      "learning_rate": 2.884108867427568e-05,
      "loss": 0.0002,
      "step": 2892
    },
    {
      "epoch": 0.8466491074041557,
      "grad_norm": 0.0005023994017392397,
      "learning_rate": 2.8833772314896108e-05,
      "loss": 0.0,
      "step": 2893
    },
    {
      "epoch": 0.8469417617793386,
      "grad_norm": 0.0015240572392940521,
      "learning_rate": 2.8826455955516536e-05,
      "loss": 0.0,
      "step": 2894
    },
    {
      "epoch": 0.8472344161545216,
      "grad_norm": 0.021614955738186836,
      "learning_rate": 2.8819139596136964e-05,
      "loss": 0.0002,
      "step": 2895
    },
    {
      "epoch": 0.8475270705297044,
      "grad_norm": 0.006151202600449324,
      "learning_rate": 2.881182323675739e-05,
      "loss": 0.0001,
      "step": 2896
    },
    {
      "epoch": 0.8478197249048873,
      "grad_norm": 0.0035713522229343653,
      "learning_rate": 2.880450687737782e-05,
      "loss": 0.0001,
      "step": 2897
    },
    {
      "epoch": 0.8481123792800702,
      "grad_norm": 0.0021826657466590405,
      "learning_rate": 2.8797190517998247e-05,
      "loss": 0.0,
      "step": 2898
    },
    {
      "epoch": 0.8484050336552531,
      "grad_norm": 0.0003361078561283648,
      "learning_rate": 2.8789874158618675e-05,
      "loss": 0.0,
      "step": 2899
    },
    {
      "epoch": 0.8486976880304361,
      "grad_norm": 0.014857086353003979,
      "learning_rate": 2.87825577992391e-05,
      "loss": 0.0001,
      "step": 2900
    },
    {
      "epoch": 0.848990342405619,
      "grad_norm": 0.00031917868182063103,
      "learning_rate": 2.8775241439859528e-05,
      "loss": 0.0,
      "step": 2901
    },
    {
      "epoch": 0.8492829967808019,
      "grad_norm": 15.271645545959473,
      "learning_rate": 2.8767925080479956e-05,
      "loss": 0.1044,
      "step": 2902
    },
    {
      "epoch": 0.8495756511559848,
      "grad_norm": 0.0025875659193843603,
      "learning_rate": 2.876060872110038e-05,
      "loss": 0.0,
      "step": 2903
    },
    {
      "epoch": 0.8498683055311677,
      "grad_norm": 0.004049929790198803,
      "learning_rate": 2.8753292361720808e-05,
      "loss": 0.0001,
      "step": 2904
    },
    {
      "epoch": 0.8501609599063507,
      "grad_norm": 0.10546530038118362,
      "learning_rate": 2.8745976002341236e-05,
      "loss": 0.0007,
      "step": 2905
    },
    {
      "epoch": 0.8504536142815335,
      "grad_norm": 0.0020989186596125364,
      "learning_rate": 2.8738659642961664e-05,
      "loss": 0.0,
      "step": 2906
    },
    {
      "epoch": 0.8507462686567164,
      "grad_norm": 0.026156364008784294,
      "learning_rate": 2.8731343283582092e-05,
      "loss": 0.0003,
      "step": 2907
    },
    {
      "epoch": 0.8510389230318993,
      "grad_norm": 0.0009036893025040627,
      "learning_rate": 2.8724026924202517e-05,
      "loss": 0.0,
      "step": 2908
    },
    {
      "epoch": 0.8513315774070822,
      "grad_norm": 5.80036735534668,
      "learning_rate": 2.8716710564822945e-05,
      "loss": 0.0665,
      "step": 2909
    },
    {
      "epoch": 0.8516242317822651,
      "grad_norm": 0.0036561263259500265,
      "learning_rate": 2.8709394205443373e-05,
      "loss": 0.0001,
      "step": 2910
    },
    {
      "epoch": 0.8519168861574481,
      "grad_norm": 0.0010862121125683188,
      "learning_rate": 2.87020778460638e-05,
      "loss": 0.0,
      "step": 2911
    },
    {
      "epoch": 0.852209540532631,
      "grad_norm": 0.0026491752360016108,
      "learning_rate": 2.869476148668423e-05,
      "loss": 0.0001,
      "step": 2912
    },
    {
      "epoch": 0.8525021949078139,
      "grad_norm": 0.0032964395359158516,
      "learning_rate": 2.8687445127304653e-05,
      "loss": 0.0001,
      "step": 2913
    },
    {
      "epoch": 0.8527948492829968,
      "grad_norm": 0.008203510195016861,
      "learning_rate": 2.868012876792508e-05,
      "loss": 0.0001,
      "step": 2914
    },
    {
      "epoch": 0.8530875036581796,
      "grad_norm": 0.00029596927925013006,
      "learning_rate": 2.867281240854551e-05,
      "loss": 0.0,
      "step": 2915
    },
    {
      "epoch": 0.8533801580333626,
      "grad_norm": 1.0822234153747559,
      "learning_rate": 2.8665496049165937e-05,
      "loss": 0.0047,
      "step": 2916
    },
    {
      "epoch": 0.8536728124085455,
      "grad_norm": 0.003979158587753773,
      "learning_rate": 2.865817968978636e-05,
      "loss": 0.0001,
      "step": 2917
    },
    {
      "epoch": 0.8539654667837284,
      "grad_norm": 0.0018156712176278234,
      "learning_rate": 2.865086333040679e-05,
      "loss": 0.0,
      "step": 2918
    },
    {
      "epoch": 0.8542581211589113,
      "grad_norm": 0.006645577028393745,
      "learning_rate": 2.8643546971027217e-05,
      "loss": 0.0001,
      "step": 2919
    },
    {
      "epoch": 0.8545507755340942,
      "grad_norm": 0.0036059929989278316,
      "learning_rate": 2.8636230611647645e-05,
      "loss": 0.0001,
      "step": 2920
    },
    {
      "epoch": 0.8548434299092772,
      "grad_norm": 0.0005414785700850189,
      "learning_rate": 2.8628914252268073e-05,
      "loss": 0.0,
      "step": 2921
    },
    {
      "epoch": 0.8551360842844601,
      "grad_norm": 0.00039202620973810554,
      "learning_rate": 2.86215978928885e-05,
      "loss": 0.0,
      "step": 2922
    },
    {
      "epoch": 0.855428738659643,
      "grad_norm": 0.0024136367719620466,
      "learning_rate": 2.861428153350893e-05,
      "loss": 0.0,
      "step": 2923
    },
    {
      "epoch": 0.8557213930348259,
      "grad_norm": 0.0005258520250208676,
      "learning_rate": 2.8606965174129354e-05,
      "loss": 0.0,
      "step": 2924
    },
    {
      "epoch": 0.8560140474100087,
      "grad_norm": 0.00053996971109882,
      "learning_rate": 2.859964881474978e-05,
      "loss": 0.0,
      "step": 2925
    },
    {
      "epoch": 0.8563067017851916,
      "grad_norm": 0.005987247452139854,
      "learning_rate": 2.8592332455370206e-05,
      "loss": 0.0001,
      "step": 2926
    },
    {
      "epoch": 0.8565993561603746,
      "grad_norm": 0.0031112045980989933,
      "learning_rate": 2.8585016095990634e-05,
      "loss": 0.0001,
      "step": 2927
    },
    {
      "epoch": 0.8568920105355575,
      "grad_norm": 0.0008803472737781703,
      "learning_rate": 2.8577699736611062e-05,
      "loss": 0.0,
      "step": 2928
    },
    {
      "epoch": 0.8571846649107404,
      "grad_norm": 0.0006713624461553991,
      "learning_rate": 2.857038337723149e-05,
      "loss": 0.0,
      "step": 2929
    },
    {
      "epoch": 0.8574773192859233,
      "grad_norm": 0.0008445119019597769,
      "learning_rate": 2.8563067017851918e-05,
      "loss": 0.0,
      "step": 2930
    },
    {
      "epoch": 0.8577699736611062,
      "grad_norm": 0.0017099477117881179,
      "learning_rate": 2.8555750658472346e-05,
      "loss": 0.0,
      "step": 2931
    },
    {
      "epoch": 0.8580626280362892,
      "grad_norm": 0.0013064577942714095,
      "learning_rate": 2.8548434299092774e-05,
      "loss": 0.0,
      "step": 2932
    },
    {
      "epoch": 0.8583552824114721,
      "grad_norm": 0.05210070684552193,
      "learning_rate": 2.8541117939713202e-05,
      "loss": 0.0003,
      "step": 2933
    },
    {
      "epoch": 0.858647936786655,
      "grad_norm": 0.0011810348369181156,
      "learning_rate": 2.853380158033363e-05,
      "loss": 0.0,
      "step": 2934
    },
    {
      "epoch": 0.8589405911618379,
      "grad_norm": 0.016888394951820374,
      "learning_rate": 2.852648522095405e-05,
      "loss": 0.0001,
      "step": 2935
    },
    {
      "epoch": 0.8592332455370207,
      "grad_norm": 3.309821128845215,
      "learning_rate": 2.851916886157448e-05,
      "loss": 0.0082,
      "step": 2936
    },
    {
      "epoch": 0.8595258999122037,
      "grad_norm": 0.004678688012063503,
      "learning_rate": 2.8511852502194907e-05,
      "loss": 0.0001,
      "step": 2937
    },
    {
      "epoch": 0.8598185542873866,
      "grad_norm": 0.0011509255273267627,
      "learning_rate": 2.8504536142815335e-05,
      "loss": 0.0,
      "step": 2938
    },
    {
      "epoch": 0.8601112086625695,
      "grad_norm": 0.0011290708789601922,
      "learning_rate": 2.8497219783435763e-05,
      "loss": 0.0,
      "step": 2939
    },
    {
      "epoch": 0.8604038630377524,
      "grad_norm": 0.0021077506244182587,
      "learning_rate": 2.848990342405619e-05,
      "loss": 0.0,
      "step": 2940
    },
    {
      "epoch": 0.8606965174129353,
      "grad_norm": 0.0008194958209060133,
      "learning_rate": 2.848258706467662e-05,
      "loss": 0.0,
      "step": 2941
    },
    {
      "epoch": 0.8609891717881182,
      "grad_norm": 0.00027053439407609403,
      "learning_rate": 2.8475270705297047e-05,
      "loss": 0.0,
      "step": 2942
    },
    {
      "epoch": 0.8612818261633012,
      "grad_norm": 0.002523620380088687,
      "learning_rate": 2.8467954345917475e-05,
      "loss": 0.0,
      "step": 2943
    },
    {
      "epoch": 0.8615744805384841,
      "grad_norm": 0.00047940504737198353,
      "learning_rate": 2.8460637986537903e-05,
      "loss": 0.0,
      "step": 2944
    },
    {
      "epoch": 0.861867134913667,
      "grad_norm": 0.0005567455664277077,
      "learning_rate": 2.8453321627158324e-05,
      "loss": 0.0,
      "step": 2945
    },
    {
      "epoch": 0.8621597892888498,
      "grad_norm": 0.0006807685131207108,
      "learning_rate": 2.8446005267778752e-05,
      "loss": 0.0,
      "step": 2946
    },
    {
      "epoch": 0.8624524436640327,
      "grad_norm": 10.231255531311035,
      "learning_rate": 2.843868890839918e-05,
      "loss": 0.0348,
      "step": 2947
    },
    {
      "epoch": 0.8627450980392157,
      "grad_norm": 0.0005288583342917264,
      "learning_rate": 2.8431372549019608e-05,
      "loss": 0.0,
      "step": 2948
    },
    {
      "epoch": 0.8630377524143986,
      "grad_norm": 13.133936882019043,
      "learning_rate": 2.8424056189640036e-05,
      "loss": 0.0892,
      "step": 2949
    },
    {
      "epoch": 0.8633304067895815,
      "grad_norm": 0.2714103162288666,
      "learning_rate": 2.8416739830260464e-05,
      "loss": 0.0011,
      "step": 2950
    },
    {
      "epoch": 0.8636230611647644,
      "grad_norm": 0.0431370884180069,
      "learning_rate": 2.840942347088089e-05,
      "loss": 0.0003,
      "step": 2951
    },
    {
      "epoch": 0.8639157155399473,
      "grad_norm": 0.011839745566248894,
      "learning_rate": 2.840210711150132e-05,
      "loss": 0.0001,
      "step": 2952
    },
    {
      "epoch": 0.8642083699151303,
      "grad_norm": 0.0012024779571220279,
      "learning_rate": 2.8394790752121747e-05,
      "loss": 0.0,
      "step": 2953
    },
    {
      "epoch": 0.8645010242903132,
      "grad_norm": 0.0008249465608969331,
      "learning_rate": 2.8387474392742175e-05,
      "loss": 0.0,
      "step": 2954
    },
    {
      "epoch": 0.8647936786654961,
      "grad_norm": 5.485530376434326,
      "learning_rate": 2.8380158033362603e-05,
      "loss": 0.15,
      "step": 2955
    },
    {
      "epoch": 0.865086333040679,
      "grad_norm": 0.0009516372811049223,
      "learning_rate": 2.8372841673983024e-05,
      "loss": 0.0,
      "step": 2956
    },
    {
      "epoch": 0.8653789874158618,
      "grad_norm": 0.004190864972770214,
      "learning_rate": 2.8365525314603452e-05,
      "loss": 0.0001,
      "step": 2957
    },
    {
      "epoch": 0.8656716417910447,
      "grad_norm": 0.0011981577845290303,
      "learning_rate": 2.835820895522388e-05,
      "loss": 0.0,
      "step": 2958
    },
    {
      "epoch": 0.8659642961662277,
      "grad_norm": 0.011278665624558926,
      "learning_rate": 2.8350892595844308e-05,
      "loss": 0.0001,
      "step": 2959
    },
    {
      "epoch": 0.8662569505414106,
      "grad_norm": 0.0007399932947009802,
      "learning_rate": 2.8343576236464736e-05,
      "loss": 0.0,
      "step": 2960
    },
    {
      "epoch": 0.8665496049165935,
      "grad_norm": 0.0057645561173558235,
      "learning_rate": 2.8336259877085164e-05,
      "loss": 0.0001,
      "step": 2961
    },
    {
      "epoch": 0.8668422592917764,
      "grad_norm": 0.027825020253658295,
      "learning_rate": 2.8328943517705592e-05,
      "loss": 0.0002,
      "step": 2962
    },
    {
      "epoch": 0.8671349136669593,
      "grad_norm": 0.03514590486884117,
      "learning_rate": 2.832162715832602e-05,
      "loss": 0.0001,
      "step": 2963
    },
    {
      "epoch": 0.8674275680421423,
      "grad_norm": 0.03552441671490669,
      "learning_rate": 2.8314310798946448e-05,
      "loss": 0.0002,
      "step": 2964
    },
    {
      "epoch": 0.8677202224173252,
      "grad_norm": 0.005665469449013472,
      "learning_rate": 2.8306994439566876e-05,
      "loss": 0.0001,
      "step": 2965
    },
    {
      "epoch": 0.868012876792508,
      "grad_norm": 0.4229358434677124,
      "learning_rate": 2.8299678080187297e-05,
      "loss": 0.0016,
      "step": 2966
    },
    {
      "epoch": 0.8683055311676909,
      "grad_norm": 0.3137822449207306,
      "learning_rate": 2.8292361720807725e-05,
      "loss": 0.0018,
      "step": 2967
    },
    {
      "epoch": 0.8685981855428738,
      "grad_norm": 0.002080155536532402,
      "learning_rate": 2.8285045361428153e-05,
      "loss": 0.0001,
      "step": 2968
    },
    {
      "epoch": 0.8688908399180568,
      "grad_norm": 0.0007805654313415289,
      "learning_rate": 2.827772900204858e-05,
      "loss": 0.0,
      "step": 2969
    },
    {
      "epoch": 0.8691834942932397,
      "grad_norm": 0.002341997576877475,
      "learning_rate": 2.827041264266901e-05,
      "loss": 0.0001,
      "step": 2970
    },
    {
      "epoch": 0.8694761486684226,
      "grad_norm": 0.00467673409730196,
      "learning_rate": 2.8263096283289437e-05,
      "loss": 0.0,
      "step": 2971
    },
    {
      "epoch": 0.8697688030436055,
      "grad_norm": 0.0031030906829982996,
      "learning_rate": 2.8255779923909865e-05,
      "loss": 0.0001,
      "step": 2972
    },
    {
      "epoch": 0.8700614574187884,
      "grad_norm": 0.0004940580110996962,
      "learning_rate": 2.8248463564530293e-05,
      "loss": 0.0,
      "step": 2973
    },
    {
      "epoch": 0.8703541117939714,
      "grad_norm": 0.0012688792776316404,
      "learning_rate": 2.824114720515072e-05,
      "loss": 0.0,
      "step": 2974
    },
    {
      "epoch": 0.8706467661691543,
      "grad_norm": 0.03329317644238472,
      "learning_rate": 2.823383084577115e-05,
      "loss": 0.0004,
      "step": 2975
    },
    {
      "epoch": 0.8709394205443372,
      "grad_norm": 0.05283841863274574,
      "learning_rate": 2.8226514486391577e-05,
      "loss": 0.0003,
      "step": 2976
    },
    {
      "epoch": 0.87123207491952,
      "grad_norm": 3.264716863632202,
      "learning_rate": 2.8219198127011998e-05,
      "loss": 0.0067,
      "step": 2977
    },
    {
      "epoch": 0.8715247292947029,
      "grad_norm": 0.009444973431527615,
      "learning_rate": 2.8211881767632426e-05,
      "loss": 0.0001,
      "step": 2978
    },
    {
      "epoch": 0.8718173836698858,
      "grad_norm": 0.0039008858148008585,
      "learning_rate": 2.8204565408252854e-05,
      "loss": 0.0001,
      "step": 2979
    },
    {
      "epoch": 0.8721100380450688,
      "grad_norm": 0.0014422088861465454,
      "learning_rate": 2.819724904887328e-05,
      "loss": 0.0,
      "step": 2980
    },
    {
      "epoch": 0.8724026924202517,
      "grad_norm": 0.006031569559127092,
      "learning_rate": 2.818993268949371e-05,
      "loss": 0.0001,
      "step": 2981
    },
    {
      "epoch": 0.8726953467954346,
      "grad_norm": 0.0257797259837389,
      "learning_rate": 2.8182616330114138e-05,
      "loss": 0.0003,
      "step": 2982
    },
    {
      "epoch": 0.8729880011706175,
      "grad_norm": 0.02930227294564247,
      "learning_rate": 2.8175299970734566e-05,
      "loss": 0.0003,
      "step": 2983
    },
    {
      "epoch": 0.8732806555458004,
      "grad_norm": 0.021732913330197334,
      "learning_rate": 2.8167983611354993e-05,
      "loss": 0.0002,
      "step": 2984
    },
    {
      "epoch": 0.8735733099209834,
      "grad_norm": 0.005003643687814474,
      "learning_rate": 2.816066725197542e-05,
      "loss": 0.0001,
      "step": 2985
    },
    {
      "epoch": 0.8738659642961663,
      "grad_norm": 0.014906673692166805,
      "learning_rate": 2.815335089259585e-05,
      "loss": 0.0002,
      "step": 2986
    },
    {
      "epoch": 0.8741586186713491,
      "grad_norm": 0.003577263094484806,
      "learning_rate": 2.8146034533216274e-05,
      "loss": 0.0001,
      "step": 2987
    },
    {
      "epoch": 0.874451273046532,
      "grad_norm": 0.6646336317062378,
      "learning_rate": 2.81387181738367e-05,
      "loss": 0.003,
      "step": 2988
    },
    {
      "epoch": 0.8747439274217149,
      "grad_norm": 0.01488159317523241,
      "learning_rate": 2.8131401814457126e-05,
      "loss": 0.0001,
      "step": 2989
    },
    {
      "epoch": 0.8750365817968979,
      "grad_norm": 6.319203853607178,
      "learning_rate": 2.8124085455077554e-05,
      "loss": 0.2414,
      "step": 2990
    },
    {
      "epoch": 0.8753292361720808,
      "grad_norm": 0.0002458333328831941,
      "learning_rate": 2.8116769095697982e-05,
      "loss": 0.0,
      "step": 2991
    },
    {
      "epoch": 0.8756218905472637,
      "grad_norm": 0.0015180050395429134,
      "learning_rate": 2.810945273631841e-05,
      "loss": 0.0,
      "step": 2992
    },
    {
      "epoch": 0.8759145449224466,
      "grad_norm": 0.00021006070892326534,
      "learning_rate": 2.8102136376938838e-05,
      "loss": 0.0,
      "step": 2993
    },
    {
      "epoch": 0.8762071992976295,
      "grad_norm": 0.00108836661092937,
      "learning_rate": 2.8094820017559266e-05,
      "loss": 0.0,
      "step": 2994
    },
    {
      "epoch": 0.8764998536728124,
      "grad_norm": 0.000900108425412327,
      "learning_rate": 2.808750365817969e-05,
      "loss": 0.0,
      "step": 2995
    },
    {
      "epoch": 0.8767925080479954,
      "grad_norm": 0.0017437812639400363,
      "learning_rate": 2.808018729880012e-05,
      "loss": 0.0,
      "step": 2996
    },
    {
      "epoch": 0.8770851624231782,
      "grad_norm": 0.03165074437856674,
      "learning_rate": 2.8072870939420547e-05,
      "loss": 0.0002,
      "step": 2997
    },
    {
      "epoch": 0.8773778167983611,
      "grad_norm": 0.0017918091034516692,
      "learning_rate": 2.806555458004097e-05,
      "loss": 0.0,
      "step": 2998
    },
    {
      "epoch": 0.877670471173544,
      "grad_norm": 0.03214191272854805,
      "learning_rate": 2.80582382206614e-05,
      "loss": 0.0002,
      "step": 2999
    },
    {
      "epoch": 0.8779631255487269,
      "grad_norm": 0.001971530495211482,
      "learning_rate": 2.8050921861281827e-05,
      "loss": 0.0,
      "step": 3000
    },
    {
      "epoch": 0.8782557799239099,
      "grad_norm": 7.419076919555664,
      "learning_rate": 2.8043605501902255e-05,
      "loss": 0.0408,
      "step": 3001
    },
    {
      "epoch": 0.8785484342990928,
      "grad_norm": 0.18764744699001312,
      "learning_rate": 2.8036289142522683e-05,
      "loss": 0.0012,
      "step": 3002
    },
    {
      "epoch": 0.8788410886742757,
      "grad_norm": 0.03457354009151459,
      "learning_rate": 2.8028972783143108e-05,
      "loss": 0.0003,
      "step": 3003
    },
    {
      "epoch": 0.8791337430494586,
      "grad_norm": 0.0014142461586743593,
      "learning_rate": 2.8021656423763536e-05,
      "loss": 0.0,
      "step": 3004
    },
    {
      "epoch": 0.8794263974246415,
      "grad_norm": 0.021508987993001938,
      "learning_rate": 2.8014340064383964e-05,
      "loss": 0.0001,
      "step": 3005
    },
    {
      "epoch": 0.8797190517998245,
      "grad_norm": 0.00040742603596299887,
      "learning_rate": 2.800702370500439e-05,
      "loss": 0.0,
      "step": 3006
    },
    {
      "epoch": 0.8800117061750073,
      "grad_norm": 0.001650528167374432,
      "learning_rate": 2.799970734562482e-05,
      "loss": 0.0,
      "step": 3007
    },
    {
      "epoch": 0.8803043605501902,
      "grad_norm": 0.002789534628391266,
      "learning_rate": 2.7992390986245247e-05,
      "loss": 0.0,
      "step": 3008
    },
    {
      "epoch": 0.8805970149253731,
      "grad_norm": 0.0006244821124710143,
      "learning_rate": 2.7985074626865672e-05,
      "loss": 0.0,
      "step": 3009
    },
    {
      "epoch": 0.880889669300556,
      "grad_norm": 0.003858131356537342,
      "learning_rate": 2.79777582674861e-05,
      "loss": 0.0001,
      "step": 3010
    },
    {
      "epoch": 0.8811823236757389,
      "grad_norm": 0.0015467230696231127,
      "learning_rate": 2.7970441908106524e-05,
      "loss": 0.0,
      "step": 3011
    },
    {
      "epoch": 0.8814749780509219,
      "grad_norm": 0.0018741864478215575,
      "learning_rate": 2.7963125548726952e-05,
      "loss": 0.0,
      "step": 3012
    },
    {
      "epoch": 0.8817676324261048,
      "grad_norm": 0.0017647253116592765,
      "learning_rate": 2.795580918934738e-05,
      "loss": 0.0,
      "step": 3013
    },
    {
      "epoch": 0.8820602868012877,
      "grad_norm": 0.0015287159476429224,
      "learning_rate": 2.7948492829967808e-05,
      "loss": 0.0,
      "step": 3014
    },
    {
      "epoch": 0.8823529411764706,
      "grad_norm": 0.0004398477612994611,
      "learning_rate": 2.7941176470588236e-05,
      "loss": 0.0,
      "step": 3015
    },
    {
      "epoch": 0.8826455955516535,
      "grad_norm": 0.0002965580206364393,
      "learning_rate": 2.7933860111208664e-05,
      "loss": 0.0,
      "step": 3016
    },
    {
      "epoch": 0.8829382499268364,
      "grad_norm": 0.0009375095833092928,
      "learning_rate": 2.7926543751829092e-05,
      "loss": 0.0,
      "step": 3017
    },
    {
      "epoch": 0.8832309043020193,
      "grad_norm": 0.0011114409426227212,
      "learning_rate": 2.791922739244952e-05,
      "loss": 0.0,
      "step": 3018
    },
    {
      "epoch": 0.8835235586772022,
      "grad_norm": 0.000699687167070806,
      "learning_rate": 2.7911911033069948e-05,
      "loss": 0.0,
      "step": 3019
    },
    {
      "epoch": 0.8838162130523851,
      "grad_norm": 0.001565412851050496,
      "learning_rate": 2.790459467369037e-05,
      "loss": 0.0,
      "step": 3020
    },
    {
      "epoch": 0.884108867427568,
      "grad_norm": 0.02848893404006958,
      "learning_rate": 2.7897278314310797e-05,
      "loss": 0.0002,
      "step": 3021
    },
    {
      "epoch": 0.884401521802751,
      "grad_norm": 0.0034479028545320034,
      "learning_rate": 2.7889961954931225e-05,
      "loss": 0.0001,
      "step": 3022
    },
    {
      "epoch": 0.8846941761779339,
      "grad_norm": 0.0010538590140640736,
      "learning_rate": 2.7882645595551653e-05,
      "loss": 0.0,
      "step": 3023
    },
    {
      "epoch": 0.8849868305531168,
      "grad_norm": 0.0004554866754915565,
      "learning_rate": 2.787532923617208e-05,
      "loss": 0.0,
      "step": 3024
    },
    {
      "epoch": 0.8852794849282997,
      "grad_norm": 0.010468067601323128,
      "learning_rate": 2.786801287679251e-05,
      "loss": 0.0001,
      "step": 3025
    },
    {
      "epoch": 0.8855721393034826,
      "grad_norm": 0.0002714238653425127,
      "learning_rate": 2.7860696517412937e-05,
      "loss": 0.0,
      "step": 3026
    },
    {
      "epoch": 0.8858647936786654,
      "grad_norm": 0.006320650223642588,
      "learning_rate": 2.7853380158033365e-05,
      "loss": 0.0001,
      "step": 3027
    },
    {
      "epoch": 0.8861574480538484,
      "grad_norm": 0.010992786847054958,
      "learning_rate": 2.7846063798653793e-05,
      "loss": 0.0001,
      "step": 3028
    },
    {
      "epoch": 0.8864501024290313,
      "grad_norm": 0.0016546098049730062,
      "learning_rate": 2.783874743927422e-05,
      "loss": 0.0,
      "step": 3029
    },
    {
      "epoch": 0.8867427568042142,
      "grad_norm": 0.002650155918672681,
      "learning_rate": 2.7831431079894642e-05,
      "loss": 0.0001,
      "step": 3030
    },
    {
      "epoch": 0.8870354111793971,
      "grad_norm": 0.0011368491686880589,
      "learning_rate": 2.782411472051507e-05,
      "loss": 0.0,
      "step": 3031
    },
    {
      "epoch": 0.88732806555458,
      "grad_norm": 0.0006310976459644735,
      "learning_rate": 2.7816798361135498e-05,
      "loss": 0.0,
      "step": 3032
    },
    {
      "epoch": 0.887620719929763,
      "grad_norm": 0.009759177453815937,
      "learning_rate": 2.7809482001755926e-05,
      "loss": 0.0001,
      "step": 3033
    },
    {
      "epoch": 0.8879133743049459,
      "grad_norm": 0.7568905353546143,
      "learning_rate": 2.7802165642376354e-05,
      "loss": 0.0035,
      "step": 3034
    },
    {
      "epoch": 0.8882060286801288,
      "grad_norm": 0.0009570059482939541,
      "learning_rate": 2.779484928299678e-05,
      "loss": 0.0,
      "step": 3035
    },
    {
      "epoch": 0.8884986830553117,
      "grad_norm": 0.002224736148491502,
      "learning_rate": 2.778753292361721e-05,
      "loss": 0.0,
      "step": 3036
    },
    {
      "epoch": 0.8887913374304945,
      "grad_norm": 0.018412360921502113,
      "learning_rate": 2.7780216564237638e-05,
      "loss": 0.0001,
      "step": 3037
    },
    {
      "epoch": 0.8890839918056775,
      "grad_norm": 0.0011815952602773905,
      "learning_rate": 2.7772900204858066e-05,
      "loss": 0.0,
      "step": 3038
    },
    {
      "epoch": 0.8893766461808604,
      "grad_norm": 0.0011148378252983093,
      "learning_rate": 2.7765583845478493e-05,
      "loss": 0.0,
      "step": 3039
    },
    {
      "epoch": 0.8896693005560433,
      "grad_norm": 0.0016473204595968127,
      "learning_rate": 2.775826748609892e-05,
      "loss": 0.0,
      "step": 3040
    },
    {
      "epoch": 0.8899619549312262,
      "grad_norm": 0.002104657469317317,
      "learning_rate": 2.7750951126719343e-05,
      "loss": 0.0,
      "step": 3041
    },
    {
      "epoch": 0.8902546093064091,
      "grad_norm": 0.0009894543327391148,
      "learning_rate": 2.774363476733977e-05,
      "loss": 0.0,
      "step": 3042
    },
    {
      "epoch": 0.8905472636815921,
      "grad_norm": 0.0005898270173929632,
      "learning_rate": 2.77363184079602e-05,
      "loss": 0.0,
      "step": 3043
    },
    {
      "epoch": 0.890839918056775,
      "grad_norm": 0.0008705668733455241,
      "learning_rate": 2.7729002048580626e-05,
      "loss": 0.0,
      "step": 3044
    },
    {
      "epoch": 0.8911325724319579,
      "grad_norm": 0.0008987328037619591,
      "learning_rate": 2.7721685689201054e-05,
      "loss": 0.0,
      "step": 3045
    },
    {
      "epoch": 0.8914252268071408,
      "grad_norm": 0.02109546959400177,
      "learning_rate": 2.7714369329821482e-05,
      "loss": 0.0002,
      "step": 3046
    },
    {
      "epoch": 0.8917178811823236,
      "grad_norm": 0.0006454390822909772,
      "learning_rate": 2.770705297044191e-05,
      "loss": 0.0,
      "step": 3047
    },
    {
      "epoch": 0.8920105355575065,
      "grad_norm": 0.0006983923376537859,
      "learning_rate": 2.7699736611062338e-05,
      "loss": 0.0,
      "step": 3048
    },
    {
      "epoch": 0.8923031899326895,
      "grad_norm": 0.001889776554889977,
      "learning_rate": 2.7692420251682766e-05,
      "loss": 0.0,
      "step": 3049
    },
    {
      "epoch": 0.8925958443078724,
      "grad_norm": 2.0714917182922363,
      "learning_rate": 2.7685103892303194e-05,
      "loss": 0.0056,
      "step": 3050
    },
    {
      "epoch": 0.8928884986830553,
      "grad_norm": 0.03971258923411369,
      "learning_rate": 2.7677787532923615e-05,
      "loss": 0.0003,
      "step": 3051
    },
    {
      "epoch": 0.8931811530582382,
      "grad_norm": 0.0026012498419731855,
      "learning_rate": 2.7670471173544043e-05,
      "loss": 0.0,
      "step": 3052
    },
    {
      "epoch": 0.8934738074334211,
      "grad_norm": 0.019663626328110695,
      "learning_rate": 2.766315481416447e-05,
      "loss": 0.0002,
      "step": 3053
    },
    {
      "epoch": 0.8937664618086041,
      "grad_norm": 0.0005580433062277734,
      "learning_rate": 2.76558384547849e-05,
      "loss": 0.0,
      "step": 3054
    },
    {
      "epoch": 0.894059116183787,
      "grad_norm": 0.0014611040242016315,
      "learning_rate": 2.7648522095405327e-05,
      "loss": 0.0,
      "step": 3055
    },
    {
      "epoch": 0.8943517705589699,
      "grad_norm": 0.0009221715736202896,
      "learning_rate": 2.7641205736025755e-05,
      "loss": 0.0,
      "step": 3056
    },
    {
      "epoch": 0.8946444249341527,
      "grad_norm": 0.00030951714143157005,
      "learning_rate": 2.7633889376646183e-05,
      "loss": 0.0,
      "step": 3057
    },
    {
      "epoch": 0.8949370793093356,
      "grad_norm": 0.00037072168197482824,
      "learning_rate": 2.762657301726661e-05,
      "loss": 0.0,
      "step": 3058
    },
    {
      "epoch": 0.8952297336845186,
      "grad_norm": 0.0011817470658570528,
      "learning_rate": 2.761925665788704e-05,
      "loss": 0.0,
      "step": 3059
    },
    {
      "epoch": 0.8955223880597015,
      "grad_norm": 0.0008959379629231989,
      "learning_rate": 2.7611940298507467e-05,
      "loss": 0.0,
      "step": 3060
    },
    {
      "epoch": 0.8958150424348844,
      "grad_norm": 0.00038279814179986715,
      "learning_rate": 2.7604623939127895e-05,
      "loss": 0.0,
      "step": 3061
    },
    {
      "epoch": 0.8961076968100673,
      "grad_norm": 1.5234986543655396,
      "learning_rate": 2.7597307579748316e-05,
      "loss": 0.0032,
      "step": 3062
    },
    {
      "epoch": 0.8964003511852502,
      "grad_norm": 0.0007021187921054661,
      "learning_rate": 2.7589991220368744e-05,
      "loss": 0.0,
      "step": 3063
    },
    {
      "epoch": 0.8966930055604331,
      "grad_norm": 0.0006102448678575456,
      "learning_rate": 2.7582674860989172e-05,
      "loss": 0.0,
      "step": 3064
    },
    {
      "epoch": 0.8969856599356161,
      "grad_norm": 8.276649475097656,
      "learning_rate": 2.75753585016096e-05,
      "loss": 0.04,
      "step": 3065
    },
    {
      "epoch": 0.897278314310799,
      "grad_norm": 11.039922714233398,
      "learning_rate": 2.7568042142230028e-05,
      "loss": 0.0428,
      "step": 3066
    },
    {
      "epoch": 0.8975709686859819,
      "grad_norm": 0.00047941674711182714,
      "learning_rate": 2.7560725782850456e-05,
      "loss": 0.0,
      "step": 3067
    },
    {
      "epoch": 0.8978636230611647,
      "grad_norm": 0.0007145427516661584,
      "learning_rate": 2.7553409423470884e-05,
      "loss": 0.0,
      "step": 3068
    },
    {
      "epoch": 0.8981562774363476,
      "grad_norm": 0.002789260121062398,
      "learning_rate": 2.754609306409131e-05,
      "loss": 0.0,
      "step": 3069
    },
    {
      "epoch": 0.8984489318115306,
      "grad_norm": 0.000519847497344017,
      "learning_rate": 2.753877670471174e-05,
      "loss": 0.0,
      "step": 3070
    },
    {
      "epoch": 0.8987415861867135,
      "grad_norm": 0.001295949099585414,
      "learning_rate": 2.7531460345332168e-05,
      "loss": 0.0,
      "step": 3071
    },
    {
      "epoch": 0.8990342405618964,
      "grad_norm": 0.0006953420233912766,
      "learning_rate": 2.7524143985952596e-05,
      "loss": 0.0,
      "step": 3072
    },
    {
      "epoch": 0.8993268949370793,
      "grad_norm": 0.001858212286606431,
      "learning_rate": 2.7516827626573017e-05,
      "loss": 0.0,
      "step": 3073
    },
    {
      "epoch": 0.8996195493122622,
      "grad_norm": 0.007349081337451935,
      "learning_rate": 2.7509511267193445e-05,
      "loss": 0.0001,
      "step": 3074
    },
    {
      "epoch": 0.8999122036874452,
      "grad_norm": 0.0004943243111483753,
      "learning_rate": 2.7502194907813873e-05,
      "loss": 0.0,
      "step": 3075
    },
    {
      "epoch": 0.9002048580626281,
      "grad_norm": 0.0209695715457201,
      "learning_rate": 2.74948785484343e-05,
      "loss": 0.0002,
      "step": 3076
    },
    {
      "epoch": 0.900497512437811,
      "grad_norm": 0.00024704058887436986,
      "learning_rate": 2.748756218905473e-05,
      "loss": 0.0,
      "step": 3077
    },
    {
      "epoch": 0.9007901668129938,
      "grad_norm": 0.0019234855426475406,
      "learning_rate": 2.7480245829675156e-05,
      "loss": 0.0,
      "step": 3078
    },
    {
      "epoch": 0.9010828211881767,
      "grad_norm": 0.0011848368449136615,
      "learning_rate": 2.7472929470295584e-05,
      "loss": 0.0,
      "step": 3079
    },
    {
      "epoch": 0.9013754755633596,
      "grad_norm": 0.006680505815893412,
      "learning_rate": 2.7465613110916012e-05,
      "loss": 0.0001,
      "step": 3080
    },
    {
      "epoch": 0.9016681299385426,
      "grad_norm": 0.006065751425921917,
      "learning_rate": 2.745829675153644e-05,
      "loss": 0.0001,
      "step": 3081
    },
    {
      "epoch": 0.9019607843137255,
      "grad_norm": 0.0021666118409484625,
      "learning_rate": 2.7450980392156865e-05,
      "loss": 0.0,
      "step": 3082
    },
    {
      "epoch": 0.9022534386889084,
      "grad_norm": 0.0295471902936697,
      "learning_rate": 2.744366403277729e-05,
      "loss": 0.0001,
      "step": 3083
    },
    {
      "epoch": 0.9025460930640913,
      "grad_norm": 0.0008403001120314002,
      "learning_rate": 2.7436347673397717e-05,
      "loss": 0.0,
      "step": 3084
    },
    {
      "epoch": 0.9028387474392742,
      "grad_norm": 0.0363955982029438,
      "learning_rate": 2.7429031314018145e-05,
      "loss": 0.0002,
      "step": 3085
    },
    {
      "epoch": 0.9031314018144572,
      "grad_norm": 0.003731783712282777,
      "learning_rate": 2.7421714954638573e-05,
      "loss": 0.0,
      "step": 3086
    },
    {
      "epoch": 0.9034240561896401,
      "grad_norm": 0.0009479248546995223,
      "learning_rate": 2.7414398595259e-05,
      "loss": 0.0,
      "step": 3087
    },
    {
      "epoch": 0.903716710564823,
      "grad_norm": 0.08302231878042221,
      "learning_rate": 2.740708223587943e-05,
      "loss": 0.0004,
      "step": 3088
    },
    {
      "epoch": 0.9040093649400058,
      "grad_norm": 0.012477376498281956,
      "learning_rate": 2.7399765876499857e-05,
      "loss": 0.0001,
      "step": 3089
    },
    {
      "epoch": 0.9043020193151887,
      "grad_norm": 0.004335212055593729,
      "learning_rate": 2.739244951712028e-05,
      "loss": 0.0,
      "step": 3090
    },
    {
      "epoch": 0.9045946736903717,
      "grad_norm": 0.0004597943334374577,
      "learning_rate": 2.738513315774071e-05,
      "loss": 0.0,
      "step": 3091
    },
    {
      "epoch": 0.9048873280655546,
      "grad_norm": 0.0008222962496802211,
      "learning_rate": 2.7377816798361138e-05,
      "loss": 0.0,
      "step": 3092
    },
    {
      "epoch": 0.9051799824407375,
      "grad_norm": 0.024342892691493034,
      "learning_rate": 2.7370500438981566e-05,
      "loss": 0.0002,
      "step": 3093
    },
    {
      "epoch": 0.9054726368159204,
      "grad_norm": 0.0015932725509628654,
      "learning_rate": 2.736318407960199e-05,
      "loss": 0.0,
      "step": 3094
    },
    {
      "epoch": 0.9057652911911033,
      "grad_norm": 0.01010272279381752,
      "learning_rate": 2.7355867720222418e-05,
      "loss": 0.0001,
      "step": 3095
    },
    {
      "epoch": 0.9060579455662863,
      "grad_norm": 0.00036504954914562404,
      "learning_rate": 2.7348551360842846e-05,
      "loss": 0.0,
      "step": 3096
    },
    {
      "epoch": 0.9063505999414692,
      "grad_norm": 0.0004988229484297335,
      "learning_rate": 2.7341235001463274e-05,
      "loss": 0.0,
      "step": 3097
    },
    {
      "epoch": 0.906643254316652,
      "grad_norm": 0.040057141333818436,
      "learning_rate": 2.73339186420837e-05,
      "loss": 0.0002,
      "step": 3098
    },
    {
      "epoch": 0.9069359086918349,
      "grad_norm": 0.00041190991760231555,
      "learning_rate": 2.7326602282704126e-05,
      "loss": 0.0,
      "step": 3099
    },
    {
      "epoch": 0.9072285630670178,
      "grad_norm": 0.0003888402134180069,
      "learning_rate": 2.7319285923324554e-05,
      "loss": 0.0,
      "step": 3100
    },
    {
      "epoch": 0.9075212174422007,
      "grad_norm": 0.0060388194397091866,
      "learning_rate": 2.7311969563944982e-05,
      "loss": 0.0,
      "step": 3101
    },
    {
      "epoch": 0.9078138718173837,
      "grad_norm": 0.00023331062402576208,
      "learning_rate": 2.730465320456541e-05,
      "loss": 0.0,
      "step": 3102
    },
    {
      "epoch": 0.9081065261925666,
      "grad_norm": 0.003220916260033846,
      "learning_rate": 2.7297336845185838e-05,
      "loss": 0.0,
      "step": 3103
    },
    {
      "epoch": 0.9083991805677495,
      "grad_norm": 0.00027145931380800903,
      "learning_rate": 2.7290020485806263e-05,
      "loss": 0.0,
      "step": 3104
    },
    {
      "epoch": 0.9086918349429324,
      "grad_norm": 0.0002177278947783634,
      "learning_rate": 2.728270412642669e-05,
      "loss": 0.0,
      "step": 3105
    },
    {
      "epoch": 0.9089844893181153,
      "grad_norm": 0.0003419017593842,
      "learning_rate": 2.7275387767047115e-05,
      "loss": 0.0,
      "step": 3106
    },
    {
      "epoch": 0.9092771436932983,
      "grad_norm": 0.00884864293038845,
      "learning_rate": 2.7268071407667543e-05,
      "loss": 0.0001,
      "step": 3107
    },
    {
      "epoch": 0.9095697980684812,
      "grad_norm": 0.009763057343661785,
      "learning_rate": 2.726075504828797e-05,
      "loss": 0.0001,
      "step": 3108
    },
    {
      "epoch": 0.909862452443664,
      "grad_norm": 0.0008707040688022971,
      "learning_rate": 2.72534386889084e-05,
      "loss": 0.0,
      "step": 3109
    },
    {
      "epoch": 0.9101551068188469,
      "grad_norm": 0.0002548248157836497,
      "learning_rate": 2.7246122329528827e-05,
      "loss": 0.0,
      "step": 3110
    },
    {
      "epoch": 0.9104477611940298,
      "grad_norm": 0.00037588385748676956,
      "learning_rate": 2.7238805970149255e-05,
      "loss": 0.0,
      "step": 3111
    },
    {
      "epoch": 0.9107404155692128,
      "grad_norm": 0.0010243757860735059,
      "learning_rate": 2.7231489610769683e-05,
      "loss": 0.0,
      "step": 3112
    },
    {
      "epoch": 0.9110330699443957,
      "grad_norm": 0.0007129418663680553,
      "learning_rate": 2.722417325139011e-05,
      "loss": 0.0,
      "step": 3113
    },
    {
      "epoch": 0.9113257243195786,
      "grad_norm": 0.000473760039312765,
      "learning_rate": 2.721685689201054e-05,
      "loss": 0.0,
      "step": 3114
    },
    {
      "epoch": 0.9116183786947615,
      "grad_norm": 0.00029461312806233764,
      "learning_rate": 2.720954053263096e-05,
      "loss": 0.0,
      "step": 3115
    },
    {
      "epoch": 0.9119110330699444,
      "grad_norm": 0.0006559625035151839,
      "learning_rate": 2.7202224173251388e-05,
      "loss": 0.0,
      "step": 3116
    },
    {
      "epoch": 0.9122036874451273,
      "grad_norm": 0.05397697910666466,
      "learning_rate": 2.7194907813871816e-05,
      "loss": 0.0003,
      "step": 3117
    },
    {
      "epoch": 0.9124963418203103,
      "grad_norm": 0.0002400352677796036,
      "learning_rate": 2.7187591454492244e-05,
      "loss": 0.0,
      "step": 3118
    },
    {
      "epoch": 0.9127889961954931,
      "grad_norm": 0.37905460596084595,
      "learning_rate": 2.7180275095112672e-05,
      "loss": 0.0008,
      "step": 3119
    },
    {
      "epoch": 0.913081650570676,
      "grad_norm": 0.0003496919816825539,
      "learning_rate": 2.71729587357331e-05,
      "loss": 0.0,
      "step": 3120
    },
    {
      "epoch": 0.9133743049458589,
      "grad_norm": 0.000749769329559058,
      "learning_rate": 2.7165642376353528e-05,
      "loss": 0.0,
      "step": 3121
    },
    {
      "epoch": 0.9136669593210418,
      "grad_norm": 0.005695801693946123,
      "learning_rate": 2.7158326016973956e-05,
      "loss": 0.0,
      "step": 3122
    },
    {
      "epoch": 0.9139596136962248,
      "grad_norm": 0.00032327789813280106,
      "learning_rate": 2.7151009657594384e-05,
      "loss": 0.0,
      "step": 3123
    },
    {
      "epoch": 0.9142522680714077,
      "grad_norm": 0.00010946913971565664,
      "learning_rate": 2.714369329821481e-05,
      "loss": 0.0,
      "step": 3124
    },
    {
      "epoch": 0.9145449224465906,
      "grad_norm": 0.000152256601722911,
      "learning_rate": 2.713637693883524e-05,
      "loss": 0.0,
      "step": 3125
    },
    {
      "epoch": 0.9148375768217735,
      "grad_norm": 0.0009119808673858643,
      "learning_rate": 2.712906057945566e-05,
      "loss": 0.0,
      "step": 3126
    },
    {
      "epoch": 0.9151302311969564,
      "grad_norm": 0.00021451925567816943,
      "learning_rate": 2.712174422007609e-05,
      "loss": 0.0,
      "step": 3127
    },
    {
      "epoch": 0.9154228855721394,
      "grad_norm": 0.005684440489858389,
      "learning_rate": 2.7114427860696517e-05,
      "loss": 0.0,
      "step": 3128
    },
    {
      "epoch": 0.9157155399473222,
      "grad_norm": 0.0006643623928539455,
      "learning_rate": 2.7107111501316945e-05,
      "loss": 0.0,
      "step": 3129
    },
    {
      "epoch": 0.9160081943225051,
      "grad_norm": 0.00016802526079118252,
      "learning_rate": 2.7099795141937373e-05,
      "loss": 0.0,
      "step": 3130
    },
    {
      "epoch": 0.916300848697688,
      "grad_norm": 7.920035362243652,
      "learning_rate": 2.70924787825578e-05,
      "loss": 0.0216,
      "step": 3131
    },
    {
      "epoch": 0.9165935030728709,
      "grad_norm": 0.00022262395941652358,
      "learning_rate": 2.708516242317823e-05,
      "loss": 0.0,
      "step": 3132
    },
    {
      "epoch": 0.9168861574480538,
      "grad_norm": 0.00024071757798083127,
      "learning_rate": 2.7077846063798656e-05,
      "loss": 0.0,
      "step": 3133
    },
    {
      "epoch": 0.9171788118232368,
      "grad_norm": 0.00023998609685804695,
      "learning_rate": 2.7070529704419084e-05,
      "loss": 0.0,
      "step": 3134
    },
    {
      "epoch": 0.9174714661984197,
      "grad_norm": 0.0003449968062341213,
      "learning_rate": 2.7063213345039512e-05,
      "loss": 0.0,
      "step": 3135
    },
    {
      "epoch": 0.9177641205736026,
      "grad_norm": 0.0002522218564990908,
      "learning_rate": 2.7055896985659934e-05,
      "loss": 0.0,
      "step": 3136
    },
    {
      "epoch": 0.9180567749487855,
      "grad_norm": 0.0006752046756446362,
      "learning_rate": 2.704858062628036e-05,
      "loss": 0.0,
      "step": 3137
    },
    {
      "epoch": 0.9183494293239683,
      "grad_norm": 0.0299922376871109,
      "learning_rate": 2.704126426690079e-05,
      "loss": 0.0001,
      "step": 3138
    },
    {
      "epoch": 0.9186420836991513,
      "grad_norm": 0.01105990819633007,
      "learning_rate": 2.7033947907521217e-05,
      "loss": 0.0001,
      "step": 3139
    },
    {
      "epoch": 0.9189347380743342,
      "grad_norm": 0.15487124025821686,
      "learning_rate": 2.7026631548141645e-05,
      "loss": 0.0004,
      "step": 3140
    },
    {
      "epoch": 0.9192273924495171,
      "grad_norm": 0.15040268003940582,
      "learning_rate": 2.7019315188762073e-05,
      "loss": 0.0004,
      "step": 3141
    },
    {
      "epoch": 0.9195200468247,
      "grad_norm": 0.0076209064573049545,
      "learning_rate": 2.70119988293825e-05,
      "loss": 0.0,
      "step": 3142
    },
    {
      "epoch": 0.9198127011998829,
      "grad_norm": 0.03322245180606842,
      "learning_rate": 2.700468247000293e-05,
      "loss": 0.0002,
      "step": 3143
    },
    {
      "epoch": 0.9201053555750659,
      "grad_norm": 0.0031604596879333258,
      "learning_rate": 2.6997366110623357e-05,
      "loss": 0.0,
      "step": 3144
    },
    {
      "epoch": 0.9203980099502488,
      "grad_norm": 0.00011157146946061403,
      "learning_rate": 2.6990049751243785e-05,
      "loss": 0.0,
      "step": 3145
    },
    {
      "epoch": 0.9206906643254317,
      "grad_norm": 0.00012219483323860914,
      "learning_rate": 2.6982733391864213e-05,
      "loss": 0.0,
      "step": 3146
    },
    {
      "epoch": 0.9209833187006146,
      "grad_norm": 0.9359664916992188,
      "learning_rate": 2.6975417032484634e-05,
      "loss": 0.0021,
      "step": 3147
    },
    {
      "epoch": 0.9212759730757974,
      "grad_norm": 0.0023510432802140713,
      "learning_rate": 2.6968100673105062e-05,
      "loss": 0.0,
      "step": 3148
    },
    {
      "epoch": 0.9215686274509803,
      "grad_norm": 0.00047637286479584873,
      "learning_rate": 2.696078431372549e-05,
      "loss": 0.0,
      "step": 3149
    },
    {
      "epoch": 0.9218612818261633,
      "grad_norm": 0.00031984152155928314,
      "learning_rate": 2.6953467954345918e-05,
      "loss": 0.0,
      "step": 3150
    },
    {
      "epoch": 0.9221539362013462,
      "grad_norm": 0.00027832211344502866,
      "learning_rate": 2.6946151594966346e-05,
      "loss": 0.0,
      "step": 3151
    },
    {
      "epoch": 0.9224465905765291,
      "grad_norm": 0.00016787606000434607,
      "learning_rate": 2.6938835235586774e-05,
      "loss": 0.0,
      "step": 3152
    },
    {
      "epoch": 0.922739244951712,
      "grad_norm": 0.0006489913794212043,
      "learning_rate": 2.6931518876207202e-05,
      "loss": 0.0,
      "step": 3153
    },
    {
      "epoch": 0.9230318993268949,
      "grad_norm": 8.885025454219431e-05,
      "learning_rate": 2.692420251682763e-05,
      "loss": 0.0,
      "step": 3154
    },
    {
      "epoch": 0.9233245537020779,
      "grad_norm": 8.944397268351167e-05,
      "learning_rate": 2.6916886157448058e-05,
      "loss": 0.0,
      "step": 3155
    },
    {
      "epoch": 0.9236172080772608,
      "grad_norm": 8.712815179023892e-05,
      "learning_rate": 2.6909569798068486e-05,
      "loss": 0.0,
      "step": 3156
    },
    {
      "epoch": 0.9239098624524437,
      "grad_norm": 0.0003146608651150018,
      "learning_rate": 2.6902253438688914e-05,
      "loss": 0.0,
      "step": 3157
    },
    {
      "epoch": 0.9242025168276266,
      "grad_norm": 0.00010711107461247593,
      "learning_rate": 2.6894937079309335e-05,
      "loss": 0.0,
      "step": 3158
    },
    {
      "epoch": 0.9244951712028094,
      "grad_norm": 0.00014019818627275527,
      "learning_rate": 2.6887620719929763e-05,
      "loss": 0.0,
      "step": 3159
    },
    {
      "epoch": 0.9247878255779924,
      "grad_norm": 0.00018984345661010593,
      "learning_rate": 2.688030436055019e-05,
      "loss": 0.0,
      "step": 3160
    },
    {
      "epoch": 0.9250804799531753,
      "grad_norm": 0.00010449805267853662,
      "learning_rate": 2.687298800117062e-05,
      "loss": 0.0,
      "step": 3161
    },
    {
      "epoch": 0.9253731343283582,
      "grad_norm": 0.05337434262037277,
      "learning_rate": 2.6865671641791047e-05,
      "loss": 0.0002,
      "step": 3162
    },
    {
      "epoch": 0.9256657887035411,
      "grad_norm": 13.599655151367188,
      "learning_rate": 2.6858355282411475e-05,
      "loss": 0.0578,
      "step": 3163
    },
    {
      "epoch": 0.925958443078724,
      "grad_norm": 0.00011503922723932192,
      "learning_rate": 2.6851038923031903e-05,
      "loss": 0.0,
      "step": 3164
    },
    {
      "epoch": 0.926251097453907,
      "grad_norm": 0.00014844737597741187,
      "learning_rate": 2.684372256365233e-05,
      "loss": 0.0,
      "step": 3165
    },
    {
      "epoch": 0.9265437518290899,
      "grad_norm": 0.00011671489482978359,
      "learning_rate": 2.683640620427276e-05,
      "loss": 0.0,
      "step": 3166
    },
    {
      "epoch": 0.9268364062042728,
      "grad_norm": 8.918878302210942e-05,
      "learning_rate": 2.6829089844893186e-05,
      "loss": 0.0,
      "step": 3167
    },
    {
      "epoch": 0.9271290605794557,
      "grad_norm": 0.00026356399757787585,
      "learning_rate": 2.6821773485513608e-05,
      "loss": 0.0,
      "step": 3168
    },
    {
      "epoch": 0.9274217149546385,
      "grad_norm": 0.00011778157204389572,
      "learning_rate": 2.6814457126134036e-05,
      "loss": 0.0,
      "step": 3169
    },
    {
      "epoch": 0.9277143693298214,
      "grad_norm": 0.00011025131971109658,
      "learning_rate": 2.6807140766754463e-05,
      "loss": 0.0,
      "step": 3170
    },
    {
      "epoch": 0.9280070237050044,
      "grad_norm": 0.0004025105736218393,
      "learning_rate": 2.679982440737489e-05,
      "loss": 0.0,
      "step": 3171
    },
    {
      "epoch": 0.9282996780801873,
      "grad_norm": 0.007656876929104328,
      "learning_rate": 2.679250804799532e-05,
      "loss": 0.0,
      "step": 3172
    },
    {
      "epoch": 0.9285923324553702,
      "grad_norm": 0.0003975860890932381,
      "learning_rate": 2.6785191688615747e-05,
      "loss": 0.0,
      "step": 3173
    },
    {
      "epoch": 0.9288849868305531,
      "grad_norm": 0.001018122653476894,
      "learning_rate": 2.6777875329236175e-05,
      "loss": 0.0,
      "step": 3174
    },
    {
      "epoch": 0.929177641205736,
      "grad_norm": 0.0026253890246152878,
      "learning_rate": 2.6770558969856603e-05,
      "loss": 0.0,
      "step": 3175
    },
    {
      "epoch": 0.929470295580919,
      "grad_norm": 0.00011285820073680952,
      "learning_rate": 2.676324261047703e-05,
      "loss": 0.0,
      "step": 3176
    },
    {
      "epoch": 0.9297629499561019,
      "grad_norm": 0.006903521716594696,
      "learning_rate": 2.6755926251097456e-05,
      "loss": 0.0,
      "step": 3177
    },
    {
      "epoch": 0.9300556043312848,
      "grad_norm": 0.0006428345805034041,
      "learning_rate": 2.6748609891717884e-05,
      "loss": 0.0,
      "step": 3178
    },
    {
      "epoch": 0.9303482587064676,
      "grad_norm": 0.00021251077123451978,
      "learning_rate": 2.6741293532338308e-05,
      "loss": 0.0,
      "step": 3179
    },
    {
      "epoch": 0.9306409130816505,
      "grad_norm": 0.0028959973715245724,
      "learning_rate": 2.6733977172958736e-05,
      "loss": 0.0,
      "step": 3180
    },
    {
      "epoch": 0.9309335674568335,
      "grad_norm": 25.46250343322754,
      "learning_rate": 2.6726660813579164e-05,
      "loss": 0.0592,
      "step": 3181
    },
    {
      "epoch": 0.9312262218320164,
      "grad_norm": 0.9013259410858154,
      "learning_rate": 2.6719344454199592e-05,
      "loss": 0.0015,
      "step": 3182
    },
    {
      "epoch": 0.9315188762071993,
      "grad_norm": 0.00012032218364765868,
      "learning_rate": 2.671202809482002e-05,
      "loss": 0.0,
      "step": 3183
    },
    {
      "epoch": 0.9318115305823822,
      "grad_norm": 0.061714570969343185,
      "learning_rate": 2.6704711735440448e-05,
      "loss": 0.0003,
      "step": 3184
    },
    {
      "epoch": 0.9321041849575651,
      "grad_norm": 0.003692185739055276,
      "learning_rate": 2.6697395376060873e-05,
      "loss": 0.0,
      "step": 3185
    },
    {
      "epoch": 0.932396839332748,
      "grad_norm": 8.049399375915527,
      "learning_rate": 2.66900790166813e-05,
      "loss": 0.0124,
      "step": 3186
    },
    {
      "epoch": 0.932689493707931,
      "grad_norm": 7.663945143576711e-05,
      "learning_rate": 2.668276265730173e-05,
      "loss": 0.0,
      "step": 3187
    },
    {
      "epoch": 0.9329821480831139,
      "grad_norm": 9.304416744271293e-05,
      "learning_rate": 2.6675446297922156e-05,
      "loss": 0.0,
      "step": 3188
    },
    {
      "epoch": 0.9332748024582967,
      "grad_norm": 3.968400415033102e-05,
      "learning_rate": 2.666812993854258e-05,
      "loss": 0.0,
      "step": 3189
    },
    {
      "epoch": 0.9335674568334796,
      "grad_norm": 0.018485285341739655,
      "learning_rate": 2.666081357916301e-05,
      "loss": 0.0001,
      "step": 3190
    },
    {
      "epoch": 0.9338601112086625,
      "grad_norm": 0.000890921161044389,
      "learning_rate": 2.6653497219783437e-05,
      "loss": 0.0,
      "step": 3191
    },
    {
      "epoch": 0.9341527655838455,
      "grad_norm": 0.00018055552209261805,
      "learning_rate": 2.6646180860403865e-05,
      "loss": 0.0,
      "step": 3192
    },
    {
      "epoch": 0.9344454199590284,
      "grad_norm": 0.0004028046387247741,
      "learning_rate": 2.663886450102429e-05,
      "loss": 0.0,
      "step": 3193
    },
    {
      "epoch": 0.9347380743342113,
      "grad_norm": 0.021287068724632263,
      "learning_rate": 2.6631548141644717e-05,
      "loss": 0.0001,
      "step": 3194
    },
    {
      "epoch": 0.9350307287093942,
      "grad_norm": 21.67721939086914,
      "learning_rate": 2.6624231782265145e-05,
      "loss": 0.0499,
      "step": 3195
    },
    {
      "epoch": 0.9353233830845771,
      "grad_norm": 0.0007531607989221811,
      "learning_rate": 2.6616915422885573e-05,
      "loss": 0.0,
      "step": 3196
    },
    {
      "epoch": 0.9356160374597601,
      "grad_norm": 0.00021644483786076307,
      "learning_rate": 2.6609599063506e-05,
      "loss": 0.0,
      "step": 3197
    },
    {
      "epoch": 0.935908691834943,
      "grad_norm": 0.002615989651530981,
      "learning_rate": 2.660228270412643e-05,
      "loss": 0.0,
      "step": 3198
    },
    {
      "epoch": 0.9362013462101259,
      "grad_norm": 0.00013541265798266977,
      "learning_rate": 2.6594966344746857e-05,
      "loss": 0.0,
      "step": 3199
    },
    {
      "epoch": 0.9364940005853087,
      "grad_norm": 0.00019994725880678743,
      "learning_rate": 2.658764998536728e-05,
      "loss": 0.0,
      "step": 3200
    },
    {
      "epoch": 0.9367866549604916,
      "grad_norm": 0.001617005211301148,
      "learning_rate": 2.6580333625987706e-05,
      "loss": 0.0,
      "step": 3201
    },
    {
      "epoch": 0.9370793093356745,
      "grad_norm": 0.0008476759539917111,
      "learning_rate": 2.6573017266608134e-05,
      "loss": 0.0,
      "step": 3202
    },
    {
      "epoch": 0.9373719637108575,
      "grad_norm": 0.0002651185786817223,
      "learning_rate": 2.6565700907228562e-05,
      "loss": 0.0,
      "step": 3203
    },
    {
      "epoch": 0.9376646180860404,
      "grad_norm": 0.0006702494574710727,
      "learning_rate": 2.655838454784899e-05,
      "loss": 0.0,
      "step": 3204
    },
    {
      "epoch": 0.9379572724612233,
      "grad_norm": 0.0005622314638458192,
      "learning_rate": 2.6551068188469418e-05,
      "loss": 0.0,
      "step": 3205
    },
    {
      "epoch": 0.9382499268364062,
      "grad_norm": 9.157440185546875,
      "learning_rate": 2.6543751829089846e-05,
      "loss": 0.0189,
      "step": 3206
    },
    {
      "epoch": 0.9385425812115891,
      "grad_norm": 0.024700021371245384,
      "learning_rate": 2.6536435469710274e-05,
      "loss": 0.0004,
      "step": 3207
    },
    {
      "epoch": 0.9388352355867721,
      "grad_norm": 21.003908157348633,
      "learning_rate": 2.6529119110330702e-05,
      "loss": 0.3449,
      "step": 3208
    },
    {
      "epoch": 0.939127889961955,
      "grad_norm": 0.0007336666458286345,
      "learning_rate": 2.652180275095113e-05,
      "loss": 0.0,
      "step": 3209
    },
    {
      "epoch": 0.9394205443371378,
      "grad_norm": 0.0008810044964775443,
      "learning_rate": 2.6514486391571558e-05,
      "loss": 0.0,
      "step": 3210
    },
    {
      "epoch": 0.9397131987123207,
      "grad_norm": 0.0016570580191910267,
      "learning_rate": 2.650717003219198e-05,
      "loss": 0.0,
      "step": 3211
    },
    {
      "epoch": 0.9400058530875036,
      "grad_norm": 0.00028649967862293124,
      "learning_rate": 2.6499853672812407e-05,
      "loss": 0.0,
      "step": 3212
    },
    {
      "epoch": 0.9402985074626866,
      "grad_norm": 0.0003914633998647332,
      "learning_rate": 2.6492537313432835e-05,
      "loss": 0.0,
      "step": 3213
    },
    {
      "epoch": 0.9405911618378695,
      "grad_norm": 0.000271236669505015,
      "learning_rate": 2.6485220954053263e-05,
      "loss": 0.0,
      "step": 3214
    },
    {
      "epoch": 0.9408838162130524,
      "grad_norm": 0.0004488322592806071,
      "learning_rate": 2.647790459467369e-05,
      "loss": 0.0,
      "step": 3215
    },
    {
      "epoch": 0.9411764705882353,
      "grad_norm": 1.9253593683242798,
      "learning_rate": 2.647058823529412e-05,
      "loss": 0.0028,
      "step": 3216
    },
    {
      "epoch": 0.9414691249634182,
      "grad_norm": 0.061426468193531036,
      "learning_rate": 2.6463271875914547e-05,
      "loss": 0.0002,
      "step": 3217
    },
    {
      "epoch": 0.9417617793386011,
      "grad_norm": 9.975981712341309,
      "learning_rate": 2.6455955516534975e-05,
      "loss": 0.0201,
      "step": 3218
    },
    {
      "epoch": 0.9420544337137841,
      "grad_norm": 0.055726345628499985,
      "learning_rate": 2.6448639157155403e-05,
      "loss": 0.0002,
      "step": 3219
    },
    {
      "epoch": 0.9423470880889669,
      "grad_norm": 22.72140121459961,
      "learning_rate": 2.644132279777583e-05,
      "loss": 0.0659,
      "step": 3220
    },
    {
      "epoch": 0.9426397424641498,
      "grad_norm": 0.001565615995787084,
      "learning_rate": 2.643400643839625e-05,
      "loss": 0.0,
      "step": 3221
    },
    {
      "epoch": 0.9429323968393327,
      "grad_norm": 0.0002118179836543277,
      "learning_rate": 2.642669007901668e-05,
      "loss": 0.0,
      "step": 3222
    },
    {
      "epoch": 0.9432250512145156,
      "grad_norm": 0.00017638143617659807,
      "learning_rate": 2.6419373719637108e-05,
      "loss": 0.0,
      "step": 3223
    },
    {
      "epoch": 0.9435177055896986,
      "grad_norm": 0.008058113977313042,
      "learning_rate": 2.6412057360257536e-05,
      "loss": 0.0,
      "step": 3224
    },
    {
      "epoch": 0.9438103599648815,
      "grad_norm": 0.00033659982727840543,
      "learning_rate": 2.6404741000877963e-05,
      "loss": 0.0,
      "step": 3225
    },
    {
      "epoch": 0.9441030143400644,
      "grad_norm": 0.0003911230305675417,
      "learning_rate": 2.639742464149839e-05,
      "loss": 0.0,
      "step": 3226
    },
    {
      "epoch": 0.9443956687152473,
      "grad_norm": 0.0328420028090477,
      "learning_rate": 2.639010828211882e-05,
      "loss": 0.0002,
      "step": 3227
    },
    {
      "epoch": 0.9446883230904302,
      "grad_norm": 14.501241683959961,
      "learning_rate": 2.6382791922739247e-05,
      "loss": 0.1735,
      "step": 3228
    },
    {
      "epoch": 0.9449809774656132,
      "grad_norm": 0.6613368391990662,
      "learning_rate": 2.6375475563359675e-05,
      "loss": 0.0013,
      "step": 3229
    },
    {
      "epoch": 0.945273631840796,
      "grad_norm": 9.179767608642578,
      "learning_rate": 2.6368159203980103e-05,
      "loss": 0.1605,
      "step": 3230
    },
    {
      "epoch": 0.9455662862159789,
      "grad_norm": 8.901062965393066,
      "learning_rate": 2.636084284460053e-05,
      "loss": 0.0386,
      "step": 3231
    },
    {
      "epoch": 0.9458589405911618,
      "grad_norm": 0.00024640775518491864,
      "learning_rate": 2.6353526485220952e-05,
      "loss": 0.0,
      "step": 3232
    },
    {
      "epoch": 0.9461515949663447,
      "grad_norm": 0.0004362701147329062,
      "learning_rate": 2.634621012584138e-05,
      "loss": 0.0,
      "step": 3233
    },
    {
      "epoch": 0.9464442493415277,
      "grad_norm": 0.0012233969755470753,
      "learning_rate": 2.6338893766461808e-05,
      "loss": 0.0,
      "step": 3234
    },
    {
      "epoch": 0.9467369037167106,
      "grad_norm": 0.0006996267475187778,
      "learning_rate": 2.6331577407082236e-05,
      "loss": 0.0,
      "step": 3235
    },
    {
      "epoch": 0.9470295580918935,
      "grad_norm": 0.020976858213543892,
      "learning_rate": 2.6324261047702664e-05,
      "loss": 0.0002,
      "step": 3236
    },
    {
      "epoch": 0.9473222124670764,
      "grad_norm": 0.00276585784740746,
      "learning_rate": 2.6316944688323092e-05,
      "loss": 0.0,
      "step": 3237
    },
    {
      "epoch": 0.9476148668422593,
      "grad_norm": 22.640687942504883,
      "learning_rate": 2.630962832894352e-05,
      "loss": 0.0774,
      "step": 3238
    },
    {
      "epoch": 0.9479075212174422,
      "grad_norm": 0.007990888319909573,
      "learning_rate": 2.6302311969563948e-05,
      "loss": 0.0001,
      "step": 3239
    },
    {
      "epoch": 0.9482001755926251,
      "grad_norm": 11.964275360107422,
      "learning_rate": 2.6294995610184376e-05,
      "loss": 0.1295,
      "step": 3240
    },
    {
      "epoch": 0.948492829967808,
      "grad_norm": 12.758728981018066,
      "learning_rate": 2.6287679250804804e-05,
      "loss": 0.1538,
      "step": 3241
    },
    {
      "epoch": 0.9487854843429909,
      "grad_norm": 0.0004913790035061538,
      "learning_rate": 2.6280362891425225e-05,
      "loss": 0.0,
      "step": 3242
    },
    {
      "epoch": 0.9490781387181738,
      "grad_norm": 0.0006277439533732831,
      "learning_rate": 2.6273046532045653e-05,
      "loss": 0.0,
      "step": 3243
    },
    {
      "epoch": 0.9493707930933567,
      "grad_norm": 0.0029538183007389307,
      "learning_rate": 2.626573017266608e-05,
      "loss": 0.0,
      "step": 3244
    },
    {
      "epoch": 0.9496634474685397,
      "grad_norm": 0.013482455164194107,
      "learning_rate": 2.625841381328651e-05,
      "loss": 0.0001,
      "step": 3245
    },
    {
      "epoch": 0.9499561018437226,
      "grad_norm": 0.001173302996903658,
      "learning_rate": 2.6251097453906937e-05,
      "loss": 0.0,
      "step": 3246
    },
    {
      "epoch": 0.9502487562189055,
      "grad_norm": 0.0009915552800521255,
      "learning_rate": 2.6243781094527365e-05,
      "loss": 0.0,
      "step": 3247
    },
    {
      "epoch": 0.9505414105940884,
      "grad_norm": 0.0009076668065972626,
      "learning_rate": 2.6236464735147793e-05,
      "loss": 0.0,
      "step": 3248
    },
    {
      "epoch": 0.9508340649692713,
      "grad_norm": 0.012135384604334831,
      "learning_rate": 2.622914837576822e-05,
      "loss": 0.0001,
      "step": 3249
    },
    {
      "epoch": 0.9511267193444543,
      "grad_norm": 0.00030453107319772243,
      "learning_rate": 2.622183201638865e-05,
      "loss": 0.0,
      "step": 3250
    },
    {
      "epoch": 0.9514193737196371,
      "grad_norm": 0.0006158557371236384,
      "learning_rate": 2.6214515657009077e-05,
      "loss": 0.0,
      "step": 3251
    },
    {
      "epoch": 0.95171202809482,
      "grad_norm": 0.0005525677115656435,
      "learning_rate": 2.6207199297629505e-05,
      "loss": 0.0,
      "step": 3252
    },
    {
      "epoch": 0.9520046824700029,
      "grad_norm": 0.011062745936214924,
      "learning_rate": 2.6199882938249926e-05,
      "loss": 0.0001,
      "step": 3253
    },
    {
      "epoch": 0.9522973368451858,
      "grad_norm": 3.0753278732299805,
      "learning_rate": 2.6192566578870354e-05,
      "loss": 0.0056,
      "step": 3254
    },
    {
      "epoch": 0.9525899912203687,
      "grad_norm": 0.00169378484133631,
      "learning_rate": 2.618525021949078e-05,
      "loss": 0.0,
      "step": 3255
    },
    {
      "epoch": 0.9528826455955517,
      "grad_norm": 2.393634557723999,
      "learning_rate": 2.617793386011121e-05,
      "loss": 0.0049,
      "step": 3256
    },
    {
      "epoch": 0.9531752999707346,
      "grad_norm": 0.0027157398872077465,
      "learning_rate": 2.6170617500731638e-05,
      "loss": 0.0,
      "step": 3257
    },
    {
      "epoch": 0.9534679543459175,
      "grad_norm": 0.0003597254981286824,
      "learning_rate": 2.6163301141352066e-05,
      "loss": 0.0,
      "step": 3258
    },
    {
      "epoch": 0.9537606087211004,
      "grad_norm": 0.0020881923846900463,
      "learning_rate": 2.6155984781972493e-05,
      "loss": 0.0,
      "step": 3259
    },
    {
      "epoch": 0.9540532630962832,
      "grad_norm": 0.0012122966581955552,
      "learning_rate": 2.614866842259292e-05,
      "loss": 0.0,
      "step": 3260
    },
    {
      "epoch": 0.9543459174714662,
      "grad_norm": 0.002034051576629281,
      "learning_rate": 2.614135206321335e-05,
      "loss": 0.0,
      "step": 3261
    },
    {
      "epoch": 0.9546385718466491,
      "grad_norm": 0.004289246164262295,
      "learning_rate": 2.6134035703833777e-05,
      "loss": 0.0001,
      "step": 3262
    },
    {
      "epoch": 0.954931226221832,
      "grad_norm": 9.942416363628581e-05,
      "learning_rate": 2.6126719344454202e-05,
      "loss": 0.0,
      "step": 3263
    },
    {
      "epoch": 0.9552238805970149,
      "grad_norm": 0.2390059232711792,
      "learning_rate": 2.6119402985074626e-05,
      "loss": 0.0007,
      "step": 3264
    },
    {
      "epoch": 0.9555165349721978,
      "grad_norm": 0.028897233307361603,
      "learning_rate": 2.6112086625695054e-05,
      "loss": 0.0002,
      "step": 3265
    },
    {
      "epoch": 0.9558091893473808,
      "grad_norm": 0.5268934369087219,
      "learning_rate": 2.6104770266315482e-05,
      "loss": 0.0018,
      "step": 3266
    },
    {
      "epoch": 0.9561018437225637,
      "grad_norm": 0.3045479655265808,
      "learning_rate": 2.609745390693591e-05,
      "loss": 0.0007,
      "step": 3267
    },
    {
      "epoch": 0.9563944980977466,
      "grad_norm": 0.0052806478925049305,
      "learning_rate": 2.6090137547556338e-05,
      "loss": 0.0001,
      "step": 3268
    },
    {
      "epoch": 0.9566871524729295,
      "grad_norm": 4.63300895690918,
      "learning_rate": 2.6082821188176766e-05,
      "loss": 0.2525,
      "step": 3269
    },
    {
      "epoch": 0.9569798068481123,
      "grad_norm": 0.0004651243216358125,
      "learning_rate": 2.6075504828797194e-05,
      "loss": 0.0,
      "step": 3270
    },
    {
      "epoch": 0.9572724612232952,
      "grad_norm": 0.023930110037326813,
      "learning_rate": 2.606818846941762e-05,
      "loss": 0.0001,
      "step": 3271
    },
    {
      "epoch": 0.9575651155984782,
      "grad_norm": 0.0017391935689374804,
      "learning_rate": 2.6060872110038047e-05,
      "loss": 0.0,
      "step": 3272
    },
    {
      "epoch": 0.9578577699736611,
      "grad_norm": 1.1058738231658936,
      "learning_rate": 2.6053555750658475e-05,
      "loss": 0.0033,
      "step": 3273
    },
    {
      "epoch": 0.958150424348844,
      "grad_norm": 0.006419755984097719,
      "learning_rate": 2.60462393912789e-05,
      "loss": 0.0001,
      "step": 3274
    },
    {
      "epoch": 0.9584430787240269,
      "grad_norm": 0.012653257697820663,
      "learning_rate": 2.6038923031899327e-05,
      "loss": 0.0001,
      "step": 3275
    },
    {
      "epoch": 0.9587357330992098,
      "grad_norm": 1.6761451959609985,
      "learning_rate": 2.6031606672519755e-05,
      "loss": 0.0049,
      "step": 3276
    },
    {
      "epoch": 0.9590283874743928,
      "grad_norm": 0.004024661611765623,
      "learning_rate": 2.6024290313140183e-05,
      "loss": 0.0,
      "step": 3277
    },
    {
      "epoch": 0.9593210418495757,
      "grad_norm": 0.018077418208122253,
      "learning_rate": 2.601697395376061e-05,
      "loss": 0.0001,
      "step": 3278
    },
    {
      "epoch": 0.9596136962247586,
      "grad_norm": 0.0010719620622694492,
      "learning_rate": 2.6009657594381036e-05,
      "loss": 0.0,
      "step": 3279
    },
    {
      "epoch": 0.9599063505999414,
      "grad_norm": 0.005239298567175865,
      "learning_rate": 2.6002341235001463e-05,
      "loss": 0.0001,
      "step": 3280
    },
    {
      "epoch": 0.9601990049751243,
      "grad_norm": 0.007799623534083366,
      "learning_rate": 2.599502487562189e-05,
      "loss": 0.0001,
      "step": 3281
    },
    {
      "epoch": 0.9604916593503073,
      "grad_norm": 0.018759027123451233,
      "learning_rate": 2.598770851624232e-05,
      "loss": 0.0002,
      "step": 3282
    },
    {
      "epoch": 0.9607843137254902,
      "grad_norm": 0.028783559799194336,
      "learning_rate": 2.5980392156862747e-05,
      "loss": 0.0002,
      "step": 3283
    },
    {
      "epoch": 0.9610769681006731,
      "grad_norm": 0.018134091049432755,
      "learning_rate": 2.5973075797483175e-05,
      "loss": 0.0001,
      "step": 3284
    },
    {
      "epoch": 0.961369622475856,
      "grad_norm": 0.0472426600754261,
      "learning_rate": 2.59657594381036e-05,
      "loss": 0.0003,
      "step": 3285
    },
    {
      "epoch": 0.9616622768510389,
      "grad_norm": 0.01522039994597435,
      "learning_rate": 2.5958443078724028e-05,
      "loss": 0.0001,
      "step": 3286
    },
    {
      "epoch": 0.9619549312262218,
      "grad_norm": 11.493000984191895,
      "learning_rate": 2.5951126719344452e-05,
      "loss": 0.108,
      "step": 3287
    },
    {
      "epoch": 0.9622475856014048,
      "grad_norm": 0.10344574600458145,
      "learning_rate": 2.594381035996488e-05,
      "loss": 0.0004,
      "step": 3288
    },
    {
      "epoch": 0.9625402399765877,
      "grad_norm": 1.291723370552063,
      "learning_rate": 2.5936494000585308e-05,
      "loss": 0.0027,
      "step": 3289
    },
    {
      "epoch": 0.9628328943517706,
      "grad_norm": 0.011618785560131073,
      "learning_rate": 2.5929177641205736e-05,
      "loss": 0.0001,
      "step": 3290
    },
    {
      "epoch": 0.9631255487269534,
      "grad_norm": 0.0016451601404696703,
      "learning_rate": 2.5921861281826164e-05,
      "loss": 0.0,
      "step": 3291
    },
    {
      "epoch": 0.9634182031021363,
      "grad_norm": 0.04313068464398384,
      "learning_rate": 2.5914544922446592e-05,
      "loss": 0.0003,
      "step": 3292
    },
    {
      "epoch": 0.9637108574773193,
      "grad_norm": 0.00029622757574543357,
      "learning_rate": 2.590722856306702e-05,
      "loss": 0.0,
      "step": 3293
    },
    {
      "epoch": 0.9640035118525022,
      "grad_norm": 0.00045671130646951497,
      "learning_rate": 2.5899912203687448e-05,
      "loss": 0.0,
      "step": 3294
    },
    {
      "epoch": 0.9642961662276851,
      "grad_norm": 0.002273781690746546,
      "learning_rate": 2.5892595844307873e-05,
      "loss": 0.0,
      "step": 3295
    },
    {
      "epoch": 0.964588820602868,
      "grad_norm": 0.0009717473876662552,
      "learning_rate": 2.5885279484928297e-05,
      "loss": 0.0,
      "step": 3296
    },
    {
      "epoch": 0.9648814749780509,
      "grad_norm": 0.0005301141645759344,
      "learning_rate": 2.5877963125548725e-05,
      "loss": 0.0,
      "step": 3297
    },
    {
      "epoch": 0.9651741293532339,
      "grad_norm": 0.0018403874710202217,
      "learning_rate": 2.5870646766169153e-05,
      "loss": 0.0,
      "step": 3298
    },
    {
      "epoch": 0.9654667837284168,
      "grad_norm": 0.0006049801595509052,
      "learning_rate": 2.586333040678958e-05,
      "loss": 0.0,
      "step": 3299
    },
    {
      "epoch": 0.9657594381035997,
      "grad_norm": 0.00038890886935405433,
      "learning_rate": 2.585601404741001e-05,
      "loss": 0.0,
      "step": 3300
    },
    {
      "epoch": 0.9660520924787825,
      "grad_norm": 0.0020230012014508247,
      "learning_rate": 2.5848697688030437e-05,
      "loss": 0.0,
      "step": 3301
    },
    {
      "epoch": 0.9663447468539654,
      "grad_norm": 0.07929252833127975,
      "learning_rate": 2.5841381328650865e-05,
      "loss": 0.0003,
      "step": 3302
    },
    {
      "epoch": 0.9666374012291484,
      "grad_norm": 0.0015152136329561472,
      "learning_rate": 2.5834064969271293e-05,
      "loss": 0.0,
      "step": 3303
    },
    {
      "epoch": 0.9669300556043313,
      "grad_norm": 0.13640671968460083,
      "learning_rate": 2.582674860989172e-05,
      "loss": 0.0005,
      "step": 3304
    },
    {
      "epoch": 0.9672227099795142,
      "grad_norm": 0.0044171796180307865,
      "learning_rate": 2.581943225051215e-05,
      "loss": 0.0001,
      "step": 3305
    },
    {
      "epoch": 0.9675153643546971,
      "grad_norm": 3.793172597885132,
      "learning_rate": 2.581211589113257e-05,
      "loss": 0.0068,
      "step": 3306
    },
    {
      "epoch": 0.96780801872988,
      "grad_norm": 0.000829192460514605,
      "learning_rate": 2.5804799531752998e-05,
      "loss": 0.0,
      "step": 3307
    },
    {
      "epoch": 0.9681006731050629,
      "grad_norm": 0.018820637837052345,
      "learning_rate": 2.5797483172373426e-05,
      "loss": 0.0001,
      "step": 3308
    },
    {
      "epoch": 0.9683933274802459,
      "grad_norm": 0.0009690960869193077,
      "learning_rate": 2.5790166812993854e-05,
      "loss": 0.0,
      "step": 3309
    },
    {
      "epoch": 0.9686859818554288,
      "grad_norm": 0.2188994586467743,
      "learning_rate": 2.578285045361428e-05,
      "loss": 0.0009,
      "step": 3310
    },
    {
      "epoch": 0.9689786362306116,
      "grad_norm": 0.0027362063992768526,
      "learning_rate": 2.577553409423471e-05,
      "loss": 0.0,
      "step": 3311
    },
    {
      "epoch": 0.9692712906057945,
      "grad_norm": 0.0008724553044885397,
      "learning_rate": 2.5768217734855138e-05,
      "loss": 0.0,
      "step": 3312
    },
    {
      "epoch": 0.9695639449809774,
      "grad_norm": 0.009658461436629295,
      "learning_rate": 2.5760901375475566e-05,
      "loss": 0.0001,
      "step": 3313
    },
    {
      "epoch": 0.9698565993561604,
      "grad_norm": 0.00034940437762998044,
      "learning_rate": 2.5753585016095993e-05,
      "loss": 0.0,
      "step": 3314
    },
    {
      "epoch": 0.9701492537313433,
      "grad_norm": 0.0005051441839896142,
      "learning_rate": 2.574626865671642e-05,
      "loss": 0.0,
      "step": 3315
    },
    {
      "epoch": 0.9704419081065262,
      "grad_norm": 0.0007315681432373822,
      "learning_rate": 2.573895229733685e-05,
      "loss": 0.0,
      "step": 3316
    },
    {
      "epoch": 0.9707345624817091,
      "grad_norm": 0.0003318546514492482,
      "learning_rate": 2.573163593795727e-05,
      "loss": 0.0,
      "step": 3317
    },
    {
      "epoch": 0.971027216856892,
      "grad_norm": 0.0004913939046673477,
      "learning_rate": 2.57243195785777e-05,
      "loss": 0.0,
      "step": 3318
    },
    {
      "epoch": 0.971319871232075,
      "grad_norm": 0.00020777047029696405,
      "learning_rate": 2.5717003219198126e-05,
      "loss": 0.0,
      "step": 3319
    },
    {
      "epoch": 0.9716125256072579,
      "grad_norm": 0.0025297177489846945,
      "learning_rate": 2.5709686859818554e-05,
      "loss": 0.0,
      "step": 3320
    },
    {
      "epoch": 0.9719051799824407,
      "grad_norm": 0.013613739982247353,
      "learning_rate": 2.5702370500438982e-05,
      "loss": 0.0001,
      "step": 3321
    },
    {
      "epoch": 0.9721978343576236,
      "grad_norm": 0.0002099954435834661,
      "learning_rate": 2.569505414105941e-05,
      "loss": 0.0,
      "step": 3322
    },
    {
      "epoch": 0.9724904887328065,
      "grad_norm": 0.0019854209385812283,
      "learning_rate": 2.5687737781679838e-05,
      "loss": 0.0,
      "step": 3323
    },
    {
      "epoch": 0.9727831431079894,
      "grad_norm": 0.00042396143544465303,
      "learning_rate": 2.5680421422300266e-05,
      "loss": 0.0,
      "step": 3324
    },
    {
      "epoch": 0.9730757974831724,
      "grad_norm": 0.002329543698579073,
      "learning_rate": 2.5673105062920694e-05,
      "loss": 0.0,
      "step": 3325
    },
    {
      "epoch": 0.9733684518583553,
      "grad_norm": 0.0005398441571742296,
      "learning_rate": 2.5665788703541122e-05,
      "loss": 0.0,
      "step": 3326
    },
    {
      "epoch": 0.9736611062335382,
      "grad_norm": 0.0003080609312746674,
      "learning_rate": 2.5658472344161543e-05,
      "loss": 0.0,
      "step": 3327
    },
    {
      "epoch": 0.9739537606087211,
      "grad_norm": 0.00035853084409609437,
      "learning_rate": 2.565115598478197e-05,
      "loss": 0.0,
      "step": 3328
    },
    {
      "epoch": 0.974246414983904,
      "grad_norm": 0.0006352364434860647,
      "learning_rate": 2.56438396254024e-05,
      "loss": 0.0,
      "step": 3329
    },
    {
      "epoch": 0.974539069359087,
      "grad_norm": 0.2538038194179535,
      "learning_rate": 2.5636523266022827e-05,
      "loss": 0.0008,
      "step": 3330
    },
    {
      "epoch": 0.9748317237342699,
      "grad_norm": 0.0013529519783332944,
      "learning_rate": 2.5629206906643255e-05,
      "loss": 0.0,
      "step": 3331
    },
    {
      "epoch": 0.9751243781094527,
      "grad_norm": 0.0006466333288699389,
      "learning_rate": 2.5621890547263683e-05,
      "loss": 0.0,
      "step": 3332
    },
    {
      "epoch": 0.9754170324846356,
      "grad_norm": 0.0003267640422564,
      "learning_rate": 2.561457418788411e-05,
      "loss": 0.0,
      "step": 3333
    },
    {
      "epoch": 0.9757096868598185,
      "grad_norm": 0.00029956409707665443,
      "learning_rate": 2.560725782850454e-05,
      "loss": 0.0,
      "step": 3334
    },
    {
      "epoch": 0.9760023412350015,
      "grad_norm": 0.0002519940317142755,
      "learning_rate": 2.5599941469124967e-05,
      "loss": 0.0,
      "step": 3335
    },
    {
      "epoch": 0.9762949956101844,
      "grad_norm": 0.0004761736490763724,
      "learning_rate": 2.5592625109745395e-05,
      "loss": 0.0,
      "step": 3336
    },
    {
      "epoch": 0.9765876499853673,
      "grad_norm": 0.00024568208027631044,
      "learning_rate": 2.5585308750365823e-05,
      "loss": 0.0,
      "step": 3337
    },
    {
      "epoch": 0.9768803043605502,
      "grad_norm": 0.000524199684150517,
      "learning_rate": 2.5577992390986244e-05,
      "loss": 0.0,
      "step": 3338
    },
    {
      "epoch": 0.9771729587357331,
      "grad_norm": 0.0008691222174093127,
      "learning_rate": 2.5570676031606672e-05,
      "loss": 0.0,
      "step": 3339
    },
    {
      "epoch": 0.977465613110916,
      "grad_norm": 0.00055574846919626,
      "learning_rate": 2.55633596722271e-05,
      "loss": 0.0,
      "step": 3340
    },
    {
      "epoch": 0.977758267486099,
      "grad_norm": 0.0003735160280484706,
      "learning_rate": 2.5556043312847528e-05,
      "loss": 0.0,
      "step": 3341
    },
    {
      "epoch": 0.9780509218612818,
      "grad_norm": 0.0003160155611112714,
      "learning_rate": 2.5548726953467956e-05,
      "loss": 0.0,
      "step": 3342
    },
    {
      "epoch": 0.9783435762364647,
      "grad_norm": 0.03528832271695137,
      "learning_rate": 2.5541410594088384e-05,
      "loss": 0.0002,
      "step": 3343
    },
    {
      "epoch": 0.9786362306116476,
      "grad_norm": 0.0003516852739267051,
      "learning_rate": 2.553409423470881e-05,
      "loss": 0.0,
      "step": 3344
    },
    {
      "epoch": 0.9789288849868305,
      "grad_norm": 0.00026389810955151916,
      "learning_rate": 2.552677787532924e-05,
      "loss": 0.0,
      "step": 3345
    },
    {
      "epoch": 0.9792215393620135,
      "grad_norm": 0.0005689252284355462,
      "learning_rate": 2.5519461515949668e-05,
      "loss": 0.0,
      "step": 3346
    },
    {
      "epoch": 0.9795141937371964,
      "grad_norm": 0.00020987627794966102,
      "learning_rate": 2.5512145156570096e-05,
      "loss": 0.0,
      "step": 3347
    },
    {
      "epoch": 0.9798068481123793,
      "grad_norm": 0.0004520490183494985,
      "learning_rate": 2.5504828797190523e-05,
      "loss": 0.0,
      "step": 3348
    },
    {
      "epoch": 0.9800995024875622,
      "grad_norm": 0.00020526224398054183,
      "learning_rate": 2.5497512437810945e-05,
      "loss": 0.0,
      "step": 3349
    },
    {
      "epoch": 0.9803921568627451,
      "grad_norm": 0.0008702614577487111,
      "learning_rate": 2.5490196078431373e-05,
      "loss": 0.0,
      "step": 3350
    },
    {
      "epoch": 0.9806848112379281,
      "grad_norm": 0.00030936242546886206,
      "learning_rate": 2.54828797190518e-05,
      "loss": 0.0,
      "step": 3351
    },
    {
      "epoch": 0.9809774656131109,
      "grad_norm": 0.0038242791779339314,
      "learning_rate": 2.547556335967223e-05,
      "loss": 0.0,
      "step": 3352
    },
    {
      "epoch": 0.9812701199882938,
      "grad_norm": 0.0009497537394054234,
      "learning_rate": 2.5468247000292656e-05,
      "loss": 0.0,
      "step": 3353
    },
    {
      "epoch": 0.9815627743634767,
      "grad_norm": 0.0002302091015735641,
      "learning_rate": 2.5460930640913084e-05,
      "loss": 0.0,
      "step": 3354
    },
    {
      "epoch": 0.9818554287386596,
      "grad_norm": 0.009501870721578598,
      "learning_rate": 2.5453614281533512e-05,
      "loss": 0.0001,
      "step": 3355
    },
    {
      "epoch": 0.9821480831138426,
      "grad_norm": 0.0034651365131139755,
      "learning_rate": 2.544629792215394e-05,
      "loss": 0.0,
      "step": 3356
    },
    {
      "epoch": 0.9824407374890255,
      "grad_norm": 0.00045796119957230985,
      "learning_rate": 2.5438981562774368e-05,
      "loss": 0.0,
      "step": 3357
    },
    {
      "epoch": 0.9827333918642084,
      "grad_norm": 0.0004128154832869768,
      "learning_rate": 2.5431665203394793e-05,
      "loss": 0.0,
      "step": 3358
    },
    {
      "epoch": 0.9830260462393913,
      "grad_norm": 0.00048634555423632264,
      "learning_rate": 2.5424348844015217e-05,
      "loss": 0.0,
      "step": 3359
    },
    {
      "epoch": 0.9833187006145742,
      "grad_norm": 0.0010877094464376569,
      "learning_rate": 2.5417032484635645e-05,
      "loss": 0.0,
      "step": 3360
    },
    {
      "epoch": 0.983611354989757,
      "grad_norm": 0.00032809775439091027,
      "learning_rate": 2.5409716125256073e-05,
      "loss": 0.0,
      "step": 3361
    },
    {
      "epoch": 0.98390400936494,
      "grad_norm": 0.000183176773134619,
      "learning_rate": 2.54023997658765e-05,
      "loss": 0.0,
      "step": 3362
    },
    {
      "epoch": 0.9841966637401229,
      "grad_norm": 0.0011083828285336494,
      "learning_rate": 2.539508340649693e-05,
      "loss": 0.0,
      "step": 3363
    },
    {
      "epoch": 0.9844893181153058,
      "grad_norm": 0.00015416370297316462,
      "learning_rate": 2.5387767047117357e-05,
      "loss": 0.0,
      "step": 3364
    },
    {
      "epoch": 0.9847819724904887,
      "grad_norm": 0.00027531859814189374,
      "learning_rate": 2.5380450687737785e-05,
      "loss": 0.0,
      "step": 3365
    },
    {
      "epoch": 0.9850746268656716,
      "grad_norm": 0.00017930881585925817,
      "learning_rate": 2.537313432835821e-05,
      "loss": 0.0,
      "step": 3366
    },
    {
      "epoch": 0.9853672812408546,
      "grad_norm": 0.0003319485695101321,
      "learning_rate": 2.5365817968978638e-05,
      "loss": 0.0,
      "step": 3367
    },
    {
      "epoch": 0.9856599356160375,
      "grad_norm": 0.0010585540439933538,
      "learning_rate": 2.5358501609599066e-05,
      "loss": 0.0,
      "step": 3368
    },
    {
      "epoch": 0.9859525899912204,
      "grad_norm": 0.0002888018498197198,
      "learning_rate": 2.5351185250219493e-05,
      "loss": 0.0,
      "step": 3369
    },
    {
      "epoch": 0.9862452443664033,
      "grad_norm": 0.0015260048676282167,
      "learning_rate": 2.5343868890839918e-05,
      "loss": 0.0,
      "step": 3370
    },
    {
      "epoch": 0.9865378987415862,
      "grad_norm": 0.00014370719145517796,
      "learning_rate": 2.5336552531460346e-05,
      "loss": 0.0,
      "step": 3371
    },
    {
      "epoch": 0.9868305531167691,
      "grad_norm": 0.0002880328393075615,
      "learning_rate": 2.5329236172080774e-05,
      "loss": 0.0,
      "step": 3372
    },
    {
      "epoch": 0.987123207491952,
      "grad_norm": 0.0006448253407143056,
      "learning_rate": 2.5321919812701202e-05,
      "loss": 0.0,
      "step": 3373
    },
    {
      "epoch": 0.9874158618671349,
      "grad_norm": 0.00018244732927996665,
      "learning_rate": 2.5314603453321626e-05,
      "loss": 0.0,
      "step": 3374
    },
    {
      "epoch": 0.9877085162423178,
      "grad_norm": 0.00013968031271360815,
      "learning_rate": 2.5307287093942054e-05,
      "loss": 0.0,
      "step": 3375
    },
    {
      "epoch": 0.9880011706175007,
      "grad_norm": 0.00027053322992287576,
      "learning_rate": 2.5299970734562482e-05,
      "loss": 0.0,
      "step": 3376
    },
    {
      "epoch": 0.9882938249926836,
      "grad_norm": 1.9271124601364136,
      "learning_rate": 2.529265437518291e-05,
      "loss": 0.0028,
      "step": 3377
    },
    {
      "epoch": 0.9885864793678666,
      "grad_norm": 0.0356573611497879,
      "learning_rate": 2.5285338015803338e-05,
      "loss": 0.0002,
      "step": 3378
    },
    {
      "epoch": 0.9888791337430495,
      "grad_norm": 0.00020806578686460853,
      "learning_rate": 2.5278021656423766e-05,
      "loss": 0.0,
      "step": 3379
    },
    {
      "epoch": 0.9891717881182324,
      "grad_norm": 0.00011854932381538674,
      "learning_rate": 2.527070529704419e-05,
      "loss": 0.0,
      "step": 3380
    },
    {
      "epoch": 0.9894644424934153,
      "grad_norm": 15.745497703552246,
      "learning_rate": 2.526338893766462e-05,
      "loss": 0.0762,
      "step": 3381
    },
    {
      "epoch": 0.9897570968685981,
      "grad_norm": 0.0009089075610972941,
      "learning_rate": 2.5256072578285043e-05,
      "loss": 0.0,
      "step": 3382
    },
    {
      "epoch": 0.9900497512437811,
      "grad_norm": 0.000981273828074336,
      "learning_rate": 2.524875621890547e-05,
      "loss": 0.0,
      "step": 3383
    },
    {
      "epoch": 0.990342405618964,
      "grad_norm": 0.000403301470214501,
      "learning_rate": 2.52414398595259e-05,
      "loss": 0.0,
      "step": 3384
    },
    {
      "epoch": 0.9906350599941469,
      "grad_norm": 0.00026050262385979295,
      "learning_rate": 2.5234123500146327e-05,
      "loss": 0.0,
      "step": 3385
    },
    {
      "epoch": 0.9909277143693298,
      "grad_norm": 0.00023542552662547678,
      "learning_rate": 2.5226807140766755e-05,
      "loss": 0.0,
      "step": 3386
    },
    {
      "epoch": 0.9912203687445127,
      "grad_norm": 0.0013741771690547466,
      "learning_rate": 2.5219490781387183e-05,
      "loss": 0.0,
      "step": 3387
    },
    {
      "epoch": 0.9915130231196957,
      "grad_norm": 0.0006651601288467646,
      "learning_rate": 2.521217442200761e-05,
      "loss": 0.0,
      "step": 3388
    },
    {
      "epoch": 0.9918056774948786,
      "grad_norm": 0.12294548004865646,
      "learning_rate": 2.520485806262804e-05,
      "loss": 0.0005,
      "step": 3389
    },
    {
      "epoch": 0.9920983318700615,
      "grad_norm": 0.016826776787638664,
      "learning_rate": 2.5197541703248467e-05,
      "loss": 0.0001,
      "step": 3390
    },
    {
      "epoch": 0.9923909862452444,
      "grad_norm": 12.275745391845703,
      "learning_rate": 2.5190225343868888e-05,
      "loss": 0.1578,
      "step": 3391
    },
    {
      "epoch": 0.9926836406204272,
      "grad_norm": 0.0016533538000658154,
      "learning_rate": 2.5182908984489316e-05,
      "loss": 0.0,
      "step": 3392
    },
    {
      "epoch": 0.9929762949956101,
      "grad_norm": 10.942267417907715,
      "learning_rate": 2.5175592625109744e-05,
      "loss": 0.2113,
      "step": 3393
    },
    {
      "epoch": 0.9932689493707931,
      "grad_norm": 0.0032125513534992933,
      "learning_rate": 2.5168276265730172e-05,
      "loss": 0.0,
      "step": 3394
    },
    {
      "epoch": 0.993561603745976,
      "grad_norm": 0.0002497293462511152,
      "learning_rate": 2.51609599063506e-05,
      "loss": 0.0,
      "step": 3395
    },
    {
      "epoch": 0.9938542581211589,
      "grad_norm": 0.0044729625806212425,
      "learning_rate": 2.5153643546971028e-05,
      "loss": 0.0,
      "step": 3396
    },
    {
      "epoch": 0.9941469124963418,
      "grad_norm": 0.012294032610952854,
      "learning_rate": 2.5146327187591456e-05,
      "loss": 0.0001,
      "step": 3397
    },
    {
      "epoch": 0.9944395668715247,
      "grad_norm": 0.008075743913650513,
      "learning_rate": 2.5139010828211884e-05,
      "loss": 0.0,
      "step": 3398
    },
    {
      "epoch": 0.9947322212467077,
      "grad_norm": 0.0002726623206399381,
      "learning_rate": 2.513169446883231e-05,
      "loss": 0.0,
      "step": 3399
    },
    {
      "epoch": 0.9950248756218906,
      "grad_norm": 0.0036799306981265545,
      "learning_rate": 2.512437810945274e-05,
      "loss": 0.0,
      "step": 3400
    },
    {
      "epoch": 0.9953175299970735,
      "grad_norm": 0.0004843377973884344,
      "learning_rate": 2.5117061750073168e-05,
      "loss": 0.0,
      "step": 3401
    },
    {
      "epoch": 0.9956101843722563,
      "grad_norm": 0.2228137105703354,
      "learning_rate": 2.510974539069359e-05,
      "loss": 0.0008,
      "step": 3402
    },
    {
      "epoch": 0.9959028387474392,
      "grad_norm": 0.0007371075917035341,
      "learning_rate": 2.5102429031314017e-05,
      "loss": 0.0,
      "step": 3403
    },
    {
      "epoch": 0.9961954931226222,
      "grad_norm": 0.0014641970628872514,
      "learning_rate": 2.5095112671934445e-05,
      "loss": 0.0,
      "step": 3404
    },
    {
      "epoch": 0.9964881474978051,
      "grad_norm": 0.0005017340299673378,
      "learning_rate": 2.5087796312554873e-05,
      "loss": 0.0,
      "step": 3405
    },
    {
      "epoch": 0.996780801872988,
      "grad_norm": 1.8678332567214966,
      "learning_rate": 2.50804799531753e-05,
      "loss": 0.0023,
      "step": 3406
    },
    {
      "epoch": 0.9970734562481709,
      "grad_norm": 0.000548258947674185,
      "learning_rate": 2.507316359379573e-05,
      "loss": 0.0,
      "step": 3407
    },
    {
      "epoch": 0.9973661106233538,
      "grad_norm": 0.0020072218030691147,
      "learning_rate": 2.5065847234416156e-05,
      "loss": 0.0,
      "step": 3408
    },
    {
      "epoch": 0.9976587649985367,
      "grad_norm": 0.004497798625379801,
      "learning_rate": 2.5058530875036584e-05,
      "loss": 0.0001,
      "step": 3409
    },
    {
      "epoch": 0.9979514193737197,
      "grad_norm": 0.050592996180057526,
      "learning_rate": 2.5051214515657012e-05,
      "loss": 0.0003,
      "step": 3410
    },
    {
      "epoch": 0.9982440737489026,
      "grad_norm": 0.0006552187842316926,
      "learning_rate": 2.504389815627744e-05,
      "loss": 0.0,
      "step": 3411
    },
    {
      "epoch": 0.9985367281240854,
      "grad_norm": 11.620914459228516,
      "learning_rate": 2.503658179689786e-05,
      "loss": 0.158,
      "step": 3412
    },
    {
      "epoch": 0.9988293824992683,
      "grad_norm": 0.0039320336654782295,
      "learning_rate": 2.502926543751829e-05,
      "loss": 0.0001,
      "step": 3413
    },
    {
      "epoch": 0.9991220368744512,
      "grad_norm": 10.041006088256836,
      "learning_rate": 2.5021949078138717e-05,
      "loss": 0.016,
      "step": 3414
    },
    {
      "epoch": 0.9994146912496342,
      "grad_norm": 1.3903892040252686,
      "learning_rate": 2.5014632718759145e-05,
      "loss": 0.0032,
      "step": 3415
    },
    {
      "epoch": 0.9997073456248171,
      "grad_norm": 0.002409636974334717,
      "learning_rate": 2.5007316359379573e-05,
      "loss": 0.0,
      "step": 3416
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.0005303791258484125,
      "learning_rate": 2.5e-05,
      "loss": 0.0,
      "step": 3417
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.9995883757306331,
      "eval_f1": 0.9997716790721037,
      "eval_loss": 0.002137020230293274,
      "eval_precision": 0.9997260273972602,
      "eval_recall": 0.9998173349164308,
      "eval_runtime": 719.3196,
      "eval_samples_per_second": 16.887,
      "eval_steps_per_second": 1.057,
      "step": 3417
    },
    {
      "epoch": 1.0002926543751829,
      "grad_norm": 0.0005364635726436973,
      "learning_rate": 2.499268364062043e-05,
      "loss": 0.0,
      "step": 3418
    },
    {
      "epoch": 1.0005853087503658,
      "grad_norm": 0.000856778584420681,
      "learning_rate": 2.4985367281240857e-05,
      "loss": 0.0,
      "step": 3419
    },
    {
      "epoch": 1.0008779631255487,
      "grad_norm": 0.05647166445851326,
      "learning_rate": 2.497805092186128e-05,
      "loss": 0.0002,
      "step": 3420
    },
    {
      "epoch": 1.0011706175007316,
      "grad_norm": 0.002607455011457205,
      "learning_rate": 2.497073456248171e-05,
      "loss": 0.0,
      "step": 3421
    },
    {
      "epoch": 1.0014632718759144,
      "grad_norm": 0.0004365872300695628,
      "learning_rate": 2.4963418203102138e-05,
      "loss": 0.0,
      "step": 3422
    },
    {
      "epoch": 1.0017559262510976,
      "grad_norm": 2.8085110187530518,
      "learning_rate": 2.4956101843722566e-05,
      "loss": 0.0029,
      "step": 3423
    },
    {
      "epoch": 1.0020485806262804,
      "grad_norm": 0.001110740122385323,
      "learning_rate": 2.4948785484342993e-05,
      "loss": 0.0,
      "step": 3424
    },
    {
      "epoch": 1.0023412350014633,
      "grad_norm": 1.543617844581604,
      "learning_rate": 2.4941469124963418e-05,
      "loss": 0.0059,
      "step": 3425
    },
    {
      "epoch": 1.0026338893766462,
      "grad_norm": 0.0005496027297340333,
      "learning_rate": 2.4934152765583846e-05,
      "loss": 0.0,
      "step": 3426
    },
    {
      "epoch": 1.002926543751829,
      "grad_norm": 0.0006246971315704286,
      "learning_rate": 2.4926836406204274e-05,
      "loss": 0.0,
      "step": 3427
    },
    {
      "epoch": 1.003219198127012,
      "grad_norm": 0.001402894384227693,
      "learning_rate": 2.4919520046824702e-05,
      "loss": 0.0,
      "step": 3428
    },
    {
      "epoch": 1.0035118525021949,
      "grad_norm": 0.025198372080922127,
      "learning_rate": 2.491220368744513e-05,
      "loss": 0.0001,
      "step": 3429
    },
    {
      "epoch": 1.0038045068773778,
      "grad_norm": 0.001699879881925881,
      "learning_rate": 2.4904887328065558e-05,
      "loss": 0.0,
      "step": 3430
    },
    {
      "epoch": 1.0040971612525607,
      "grad_norm": 0.004124501720070839,
      "learning_rate": 2.4897570968685982e-05,
      "loss": 0.0,
      "step": 3431
    },
    {
      "epoch": 1.0043898156277435,
      "grad_norm": 0.0007139227818697691,
      "learning_rate": 2.489025460930641e-05,
      "loss": 0.0,
      "step": 3432
    },
    {
      "epoch": 1.0046824700029267,
      "grad_norm": 1.722924828529358,
      "learning_rate": 2.4882938249926838e-05,
      "loss": 0.0085,
      "step": 3433
    },
    {
      "epoch": 1.0049751243781095,
      "grad_norm": 0.0007747646304778755,
      "learning_rate": 2.4875621890547266e-05,
      "loss": 0.0,
      "step": 3434
    },
    {
      "epoch": 1.0052677787532924,
      "grad_norm": 0.0024586129002273083,
      "learning_rate": 2.4868305531167694e-05,
      "loss": 0.0,
      "step": 3435
    },
    {
      "epoch": 1.0055604331284753,
      "grad_norm": 0.00048653915291652083,
      "learning_rate": 2.486098917178812e-05,
      "loss": 0.0,
      "step": 3436
    },
    {
      "epoch": 1.0058530875036582,
      "grad_norm": 0.0017318056197836995,
      "learning_rate": 2.4853672812408547e-05,
      "loss": 0.0,
      "step": 3437
    },
    {
      "epoch": 1.006145741878841,
      "grad_norm": 0.024242956191301346,
      "learning_rate": 2.4846356453028975e-05,
      "loss": 0.0001,
      "step": 3438
    },
    {
      "epoch": 1.006438396254024,
      "grad_norm": 0.0004281763976905495,
      "learning_rate": 2.4839040093649403e-05,
      "loss": 0.0,
      "step": 3439
    },
    {
      "epoch": 1.0067310506292069,
      "grad_norm": 0.00036549000651575625,
      "learning_rate": 2.483172373426983e-05,
      "loss": 0.0,
      "step": 3440
    },
    {
      "epoch": 1.0070237050043898,
      "grad_norm": 0.0002772965235635638,
      "learning_rate": 2.4824407374890255e-05,
      "loss": 0.0,
      "step": 3441
    },
    {
      "epoch": 1.0073163593795726,
      "grad_norm": 0.00019915489247068763,
      "learning_rate": 2.4817091015510683e-05,
      "loss": 0.0,
      "step": 3442
    },
    {
      "epoch": 1.0076090137547555,
      "grad_norm": 0.0008135453681461513,
      "learning_rate": 2.480977465613111e-05,
      "loss": 0.0,
      "step": 3443
    },
    {
      "epoch": 1.0079016681299386,
      "grad_norm": 0.0006470750086009502,
      "learning_rate": 2.480245829675154e-05,
      "loss": 0.0,
      "step": 3444
    },
    {
      "epoch": 1.0081943225051215,
      "grad_norm": 0.00011369881394784898,
      "learning_rate": 2.4795141937371967e-05,
      "loss": 0.0,
      "step": 3445
    },
    {
      "epoch": 1.0084869768803044,
      "grad_norm": 0.0005042713601142168,
      "learning_rate": 2.4787825577992395e-05,
      "loss": 0.0,
      "step": 3446
    },
    {
      "epoch": 1.0087796312554873,
      "grad_norm": 0.00038000199128873646,
      "learning_rate": 2.478050921861282e-05,
      "loss": 0.0,
      "step": 3447
    },
    {
      "epoch": 1.0090722856306702,
      "grad_norm": 0.0016200197860598564,
      "learning_rate": 2.4773192859233247e-05,
      "loss": 0.0,
      "step": 3448
    },
    {
      "epoch": 1.009364940005853,
      "grad_norm": 3.1312618255615234,
      "learning_rate": 2.4765876499853675e-05,
      "loss": 0.3212,
      "step": 3449
    },
    {
      "epoch": 1.009657594381036,
      "grad_norm": 0.0004399172612465918,
      "learning_rate": 2.4758560140474103e-05,
      "loss": 0.0,
      "step": 3450
    },
    {
      "epoch": 1.0099502487562189,
      "grad_norm": 0.001580632757395506,
      "learning_rate": 2.475124378109453e-05,
      "loss": 0.0,
      "step": 3451
    },
    {
      "epoch": 1.0102429031314017,
      "grad_norm": 0.0017576804384589195,
      "learning_rate": 2.4743927421714956e-05,
      "loss": 0.0,
      "step": 3452
    },
    {
      "epoch": 1.0105355575065846,
      "grad_norm": 0.0011914470233023167,
      "learning_rate": 2.4736611062335384e-05,
      "loss": 0.0,
      "step": 3453
    },
    {
      "epoch": 1.0108282118817675,
      "grad_norm": 0.0014104278525337577,
      "learning_rate": 2.472929470295581e-05,
      "loss": 0.0,
      "step": 3454
    },
    {
      "epoch": 1.0111208662569506,
      "grad_norm": 0.0016562313539907336,
      "learning_rate": 2.472197834357624e-05,
      "loss": 0.0,
      "step": 3455
    },
    {
      "epoch": 1.0114135206321335,
      "grad_norm": 0.0008212103857658803,
      "learning_rate": 2.4714661984196668e-05,
      "loss": 0.0,
      "step": 3456
    },
    {
      "epoch": 1.0117061750073164,
      "grad_norm": 0.002704160986468196,
      "learning_rate": 2.4707345624817092e-05,
      "loss": 0.0001,
      "step": 3457
    },
    {
      "epoch": 1.0119988293824993,
      "grad_norm": 0.00468985503539443,
      "learning_rate": 2.470002926543752e-05,
      "loss": 0.0001,
      "step": 3458
    },
    {
      "epoch": 1.0122914837576822,
      "grad_norm": 0.0041338070295751095,
      "learning_rate": 2.4692712906057948e-05,
      "loss": 0.0001,
      "step": 3459
    },
    {
      "epoch": 1.012584138132865,
      "grad_norm": 0.0030151098035275936,
      "learning_rate": 2.4685396546678376e-05,
      "loss": 0.0001,
      "step": 3460
    },
    {
      "epoch": 1.012876792508048,
      "grad_norm": 0.007817713543772697,
      "learning_rate": 2.46780801872988e-05,
      "loss": 0.0001,
      "step": 3461
    },
    {
      "epoch": 1.0131694468832309,
      "grad_norm": 0.0026054515037685633,
      "learning_rate": 2.467076382791923e-05,
      "loss": 0.0001,
      "step": 3462
    },
    {
      "epoch": 1.0134621012584137,
      "grad_norm": 0.0018012281507253647,
      "learning_rate": 2.4663447468539656e-05,
      "loss": 0.0,
      "step": 3463
    },
    {
      "epoch": 1.0137547556335966,
      "grad_norm": 0.009095782414078712,
      "learning_rate": 2.4656131109160084e-05,
      "loss": 0.0002,
      "step": 3464
    },
    {
      "epoch": 1.0140474100087797,
      "grad_norm": 0.010534383356571198,
      "learning_rate": 2.464881474978051e-05,
      "loss": 0.0002,
      "step": 3465
    },
    {
      "epoch": 1.0143400643839626,
      "grad_norm": 0.002322286134585738,
      "learning_rate": 2.4641498390400937e-05,
      "loss": 0.0001,
      "step": 3466
    },
    {
      "epoch": 1.0146327187591455,
      "grad_norm": 0.007303439546376467,
      "learning_rate": 2.4634182031021365e-05,
      "loss": 0.0001,
      "step": 3467
    },
    {
      "epoch": 1.0149253731343284,
      "grad_norm": 0.010205356404185295,
      "learning_rate": 2.4626865671641793e-05,
      "loss": 0.0002,
      "step": 3468
    },
    {
      "epoch": 1.0152180275095113,
      "grad_norm": 0.00949253048747778,
      "learning_rate": 2.4619549312262217e-05,
      "loss": 0.0002,
      "step": 3469
    },
    {
      "epoch": 1.0155106818846942,
      "grad_norm": 0.0067353821359574795,
      "learning_rate": 2.4612232952882645e-05,
      "loss": 0.0002,
      "step": 3470
    },
    {
      "epoch": 1.015803336259877,
      "grad_norm": 0.012329000979661942,
      "learning_rate": 2.4604916593503073e-05,
      "loss": 0.0002,
      "step": 3471
    },
    {
      "epoch": 1.01609599063506,
      "grad_norm": 0.005563882179558277,
      "learning_rate": 2.45976002341235e-05,
      "loss": 0.0001,
      "step": 3472
    },
    {
      "epoch": 1.0163886450102428,
      "grad_norm": 0.016726041212677956,
      "learning_rate": 2.4590283874743926e-05,
      "loss": 0.0003,
      "step": 3473
    },
    {
      "epoch": 1.0166812993854257,
      "grad_norm": 8.889880180358887,
      "learning_rate": 2.4582967515364354e-05,
      "loss": 0.0333,
      "step": 3474
    },
    {
      "epoch": 1.0169739537606086,
      "grad_norm": 0.014499545097351074,
      "learning_rate": 2.457565115598478e-05,
      "loss": 0.0003,
      "step": 3475
    },
    {
      "epoch": 1.0172666081357917,
      "grad_norm": 0.005111265927553177,
      "learning_rate": 2.456833479660521e-05,
      "loss": 0.0001,
      "step": 3476
    },
    {
      "epoch": 1.0175592625109746,
      "grad_norm": 0.00333440606482327,
      "learning_rate": 2.4561018437225638e-05,
      "loss": 0.0001,
      "step": 3477
    },
    {
      "epoch": 1.0178519168861575,
      "grad_norm": 0.00515385065227747,
      "learning_rate": 2.4553702077846066e-05,
      "loss": 0.0001,
      "step": 3478
    },
    {
      "epoch": 1.0181445712613404,
      "grad_norm": 0.009151050820946693,
      "learning_rate": 2.454638571846649e-05,
      "loss": 0.0002,
      "step": 3479
    },
    {
      "epoch": 1.0184372256365233,
      "grad_norm": 0.010349135845899582,
      "learning_rate": 2.4539069359086918e-05,
      "loss": 0.0002,
      "step": 3480
    },
    {
      "epoch": 1.0187298800117062,
      "grad_norm": 0.0076150717213749886,
      "learning_rate": 2.4531752999707346e-05,
      "loss": 0.0001,
      "step": 3481
    },
    {
      "epoch": 1.019022534386889,
      "grad_norm": 0.007438007742166519,
      "learning_rate": 2.4524436640327774e-05,
      "loss": 0.0002,
      "step": 3482
    },
    {
      "epoch": 1.019315188762072,
      "grad_norm": 0.008855358697474003,
      "learning_rate": 2.4517120280948202e-05,
      "loss": 0.0002,
      "step": 3483
    },
    {
      "epoch": 1.0196078431372548,
      "grad_norm": 0.004839539993554354,
      "learning_rate": 2.4509803921568626e-05,
      "loss": 0.0001,
      "step": 3484
    },
    {
      "epoch": 1.0199004975124377,
      "grad_norm": 0.02208811230957508,
      "learning_rate": 2.4502487562189054e-05,
      "loss": 0.0001,
      "step": 3485
    },
    {
      "epoch": 1.0201931518876206,
      "grad_norm": 0.004208148457109928,
      "learning_rate": 2.4495171202809482e-05,
      "loss": 0.0001,
      "step": 3486
    },
    {
      "epoch": 1.0204858062628037,
      "grad_norm": 0.010331617668271065,
      "learning_rate": 2.448785484342991e-05,
      "loss": 0.0002,
      "step": 3487
    },
    {
      "epoch": 1.0207784606379866,
      "grad_norm": 0.005608166567981243,
      "learning_rate": 2.4480538484050338e-05,
      "loss": 0.0001,
      "step": 3488
    },
    {
      "epoch": 1.0210711150131695,
      "grad_norm": 9.352372169494629,
      "learning_rate": 2.4473222124670763e-05,
      "loss": 0.0126,
      "step": 3489
    },
    {
      "epoch": 1.0213637693883524,
      "grad_norm": 0.0057179625146090984,
      "learning_rate": 2.446590576529119e-05,
      "loss": 0.0001,
      "step": 3490
    },
    {
      "epoch": 1.0216564237635353,
      "grad_norm": 0.0024904771707952023,
      "learning_rate": 2.445858940591162e-05,
      "loss": 0.0001,
      "step": 3491
    },
    {
      "epoch": 1.0219490781387182,
      "grad_norm": 0.00714457593858242,
      "learning_rate": 2.4451273046532047e-05,
      "loss": 0.0002,
      "step": 3492
    },
    {
      "epoch": 1.022241732513901,
      "grad_norm": 0.002021880354732275,
      "learning_rate": 2.4443956687152475e-05,
      "loss": 0.0001,
      "step": 3493
    },
    {
      "epoch": 1.022534386889084,
      "grad_norm": 0.005838671233505011,
      "learning_rate": 2.44366403277729e-05,
      "loss": 0.0001,
      "step": 3494
    },
    {
      "epoch": 1.0228270412642668,
      "grad_norm": 0.003814258612692356,
      "learning_rate": 2.4429323968393327e-05,
      "loss": 0.0001,
      "step": 3495
    },
    {
      "epoch": 1.0231196956394497,
      "grad_norm": 0.0028571065049618483,
      "learning_rate": 2.4422007609013755e-05,
      "loss": 0.0001,
      "step": 3496
    },
    {
      "epoch": 1.0234123500146328,
      "grad_norm": 0.004640253726392984,
      "learning_rate": 2.4414691249634183e-05,
      "loss": 0.0001,
      "step": 3497
    },
    {
      "epoch": 1.0237050043898157,
      "grad_norm": 0.004929804243147373,
      "learning_rate": 2.440737489025461e-05,
      "loss": 0.0001,
      "step": 3498
    },
    {
      "epoch": 1.0239976587649986,
      "grad_norm": 0.00255428534001112,
      "learning_rate": 2.440005853087504e-05,
      "loss": 0.0001,
      "step": 3499
    },
    {
      "epoch": 1.0242903131401815,
      "grad_norm": 0.00356631469912827,
      "learning_rate": 2.4392742171495463e-05,
      "loss": 0.0001,
      "step": 3500
    },
    {
      "epoch": 1.0245829675153644,
      "grad_norm": 0.004246738739311695,
      "learning_rate": 2.438542581211589e-05,
      "loss": 0.0001,
      "step": 3501
    },
    {
      "epoch": 1.0248756218905473,
      "grad_norm": 0.0017620133003219962,
      "learning_rate": 2.437810945273632e-05,
      "loss": 0.0,
      "step": 3502
    },
    {
      "epoch": 1.0251682762657301,
      "grad_norm": 0.003564164973795414,
      "learning_rate": 2.4370793093356747e-05,
      "loss": 0.0001,
      "step": 3503
    },
    {
      "epoch": 1.025460930640913,
      "grad_norm": 0.00230700196698308,
      "learning_rate": 2.4363476733977175e-05,
      "loss": 0.0001,
      "step": 3504
    },
    {
      "epoch": 1.025753585016096,
      "grad_norm": 0.0015987782971933484,
      "learning_rate": 2.43561603745976e-05,
      "loss": 0.0,
      "step": 3505
    },
    {
      "epoch": 1.0260462393912788,
      "grad_norm": 0.0009958547307178378,
      "learning_rate": 2.4348844015218028e-05,
      "loss": 0.0,
      "step": 3506
    },
    {
      "epoch": 1.0263388937664617,
      "grad_norm": 0.0028785394970327616,
      "learning_rate": 2.4341527655838456e-05,
      "loss": 0.0001,
      "step": 3507
    },
    {
      "epoch": 1.0266315481416448,
      "grad_norm": 0.002653158036991954,
      "learning_rate": 2.4334211296458884e-05,
      "loss": 0.0001,
      "step": 3508
    },
    {
      "epoch": 1.0269242025168277,
      "grad_norm": 0.0009308578446507454,
      "learning_rate": 2.432689493707931e-05,
      "loss": 0.0,
      "step": 3509
    },
    {
      "epoch": 1.0272168568920106,
      "grad_norm": 0.004706493113189936,
      "learning_rate": 2.4319578577699736e-05,
      "loss": 0.0001,
      "step": 3510
    },
    {
      "epoch": 1.0275095112671935,
      "grad_norm": 0.002086702734231949,
      "learning_rate": 2.4312262218320164e-05,
      "loss": 0.0,
      "step": 3511
    },
    {
      "epoch": 1.0278021656423764,
      "grad_norm": 0.003446651855483651,
      "learning_rate": 2.4304945858940592e-05,
      "loss": 0.0001,
      "step": 3512
    },
    {
      "epoch": 1.0280948200175593,
      "grad_norm": 0.0022666268050670624,
      "learning_rate": 2.429762949956102e-05,
      "loss": 0.0001,
      "step": 3513
    },
    {
      "epoch": 1.0283874743927421,
      "grad_norm": 0.0011834276374429464,
      "learning_rate": 2.4290313140181448e-05,
      "loss": 0.0,
      "step": 3514
    },
    {
      "epoch": 1.028680128767925,
      "grad_norm": 0.0007315911934711039,
      "learning_rate": 2.4282996780801876e-05,
      "loss": 0.0,
      "step": 3515
    },
    {
      "epoch": 1.028972783143108,
      "grad_norm": 0.0026782916393131018,
      "learning_rate": 2.42756804214223e-05,
      "loss": 0.0001,
      "step": 3516
    },
    {
      "epoch": 1.0292654375182908,
      "grad_norm": 0.0027841634582728148,
      "learning_rate": 2.426836406204273e-05,
      "loss": 0.0001,
      "step": 3517
    },
    {
      "epoch": 1.029558091893474,
      "grad_norm": 0.001004671910777688,
      "learning_rate": 2.4261047702663156e-05,
      "loss": 0.0,
      "step": 3518
    },
    {
      "epoch": 1.0298507462686568,
      "grad_norm": 0.0021532170940190554,
      "learning_rate": 2.4253731343283584e-05,
      "loss": 0.0001,
      "step": 3519
    },
    {
      "epoch": 1.0301434006438397,
      "grad_norm": 0.0016680386615917087,
      "learning_rate": 2.4246414983904012e-05,
      "loss": 0.0,
      "step": 3520
    },
    {
      "epoch": 1.0304360550190226,
      "grad_norm": 0.004606564529240131,
      "learning_rate": 2.4239098624524437e-05,
      "loss": 0.0001,
      "step": 3521
    },
    {
      "epoch": 1.0307287093942055,
      "grad_norm": 0.0014813424786552787,
      "learning_rate": 2.4231782265144865e-05,
      "loss": 0.0,
      "step": 3522
    },
    {
      "epoch": 1.0310213637693884,
      "grad_norm": 0.0005702835042029619,
      "learning_rate": 2.4224465905765293e-05,
      "loss": 0.0,
      "step": 3523
    },
    {
      "epoch": 1.0313140181445712,
      "grad_norm": 0.002096495358273387,
      "learning_rate": 2.421714954638572e-05,
      "loss": 0.0,
      "step": 3524
    },
    {
      "epoch": 1.0316066725197541,
      "grad_norm": 0.0034722215496003628,
      "learning_rate": 2.420983318700615e-05,
      "loss": 0.0001,
      "step": 3525
    },
    {
      "epoch": 1.031899326894937,
      "grad_norm": 0.0027094169054180384,
      "learning_rate": 2.4202516827626573e-05,
      "loss": 0.0001,
      "step": 3526
    },
    {
      "epoch": 1.03219198127012,
      "grad_norm": 0.002960802521556616,
      "learning_rate": 2.4195200468247e-05,
      "loss": 0.0001,
      "step": 3527
    },
    {
      "epoch": 1.0324846356453028,
      "grad_norm": 0.0020208009518682957,
      "learning_rate": 2.418788410886743e-05,
      "loss": 0.0001,
      "step": 3528
    },
    {
      "epoch": 1.032777290020486,
      "grad_norm": 0.0008873249753378332,
      "learning_rate": 2.4180567749487857e-05,
      "loss": 0.0,
      "step": 3529
    },
    {
      "epoch": 1.0330699443956688,
      "grad_norm": 0.0014568967744708061,
      "learning_rate": 2.4173251390108285e-05,
      "loss": 0.0,
      "step": 3530
    },
    {
      "epoch": 1.0333625987708517,
      "grad_norm": 0.004252336453646421,
      "learning_rate": 2.4165935030728713e-05,
      "loss": 0.0001,
      "step": 3531
    },
    {
      "epoch": 1.0336552531460346,
      "grad_norm": 0.0038973314221948385,
      "learning_rate": 2.4158618671349138e-05,
      "loss": 0.0001,
      "step": 3532
    },
    {
      "epoch": 1.0339479075212175,
      "grad_norm": 0.0010255716042593122,
      "learning_rate": 2.4151302311969566e-05,
      "loss": 0.0,
      "step": 3533
    },
    {
      "epoch": 1.0342405618964003,
      "grad_norm": 0.0017912236507982016,
      "learning_rate": 2.4143985952589993e-05,
      "loss": 0.0,
      "step": 3534
    },
    {
      "epoch": 1.0345332162715832,
      "grad_norm": 0.0040057022124528885,
      "learning_rate": 2.413666959321042e-05,
      "loss": 0.0001,
      "step": 3535
    },
    {
      "epoch": 1.0348258706467661,
      "grad_norm": 0.001981346635147929,
      "learning_rate": 2.412935323383085e-05,
      "loss": 0.0,
      "step": 3536
    },
    {
      "epoch": 1.035118525021949,
      "grad_norm": 0.0009333692723885179,
      "learning_rate": 2.4122036874451274e-05,
      "loss": 0.0,
      "step": 3537
    },
    {
      "epoch": 1.035411179397132,
      "grad_norm": 0.0011883610859513283,
      "learning_rate": 2.4114720515071702e-05,
      "loss": 0.0,
      "step": 3538
    },
    {
      "epoch": 1.035703833772315,
      "grad_norm": 0.002273314166814089,
      "learning_rate": 2.410740415569213e-05,
      "loss": 0.0001,
      "step": 3539
    },
    {
      "epoch": 1.035996488147498,
      "grad_norm": 0.006086126435548067,
      "learning_rate": 2.4100087796312558e-05,
      "loss": 0.0001,
      "step": 3540
    },
    {
      "epoch": 1.0362891425226808,
      "grad_norm": 4.4004974365234375,
      "learning_rate": 2.4092771436932986e-05,
      "loss": 0.149,
      "step": 3541
    },
    {
      "epoch": 1.0365817968978637,
      "grad_norm": 0.008607176132500172,
      "learning_rate": 2.408545507755341e-05,
      "loss": 0.0002,
      "step": 3542
    },
    {
      "epoch": 1.0368744512730466,
      "grad_norm": 0.0027010750491172075,
      "learning_rate": 2.4078138718173838e-05,
      "loss": 0.0001,
      "step": 3543
    },
    {
      "epoch": 1.0371671056482294,
      "grad_norm": 0.0015393191715702415,
      "learning_rate": 2.4070822358794266e-05,
      "loss": 0.0,
      "step": 3544
    },
    {
      "epoch": 1.0374597600234123,
      "grad_norm": 0.0048315017484128475,
      "learning_rate": 2.4063505999414694e-05,
      "loss": 0.0001,
      "step": 3545
    },
    {
      "epoch": 1.0377524143985952,
      "grad_norm": 0.0029096445068717003,
      "learning_rate": 2.4056189640035122e-05,
      "loss": 0.0001,
      "step": 3546
    },
    {
      "epoch": 1.0380450687737781,
      "grad_norm": 0.1986430436372757,
      "learning_rate": 2.4048873280655547e-05,
      "loss": 0.0013,
      "step": 3547
    },
    {
      "epoch": 1.038337723148961,
      "grad_norm": 0.0035794368013739586,
      "learning_rate": 2.4041556921275975e-05,
      "loss": 0.0001,
      "step": 3548
    },
    {
      "epoch": 1.0386303775241439,
      "grad_norm": 0.01305379904806614,
      "learning_rate": 2.4034240561896403e-05,
      "loss": 0.0002,
      "step": 3549
    },
    {
      "epoch": 1.038923031899327,
      "grad_norm": 0.00715178856626153,
      "learning_rate": 2.402692420251683e-05,
      "loss": 0.0001,
      "step": 3550
    },
    {
      "epoch": 1.0392156862745099,
      "grad_norm": 0.0010849026730284095,
      "learning_rate": 2.401960784313726e-05,
      "loss": 0.0,
      "step": 3551
    },
    {
      "epoch": 1.0395083406496928,
      "grad_norm": 0.013471659272909164,
      "learning_rate": 2.4012291483757683e-05,
      "loss": 0.0003,
      "step": 3552
    },
    {
      "epoch": 1.0398009950248757,
      "grad_norm": 0.031217027455568314,
      "learning_rate": 2.400497512437811e-05,
      "loss": 0.0003,
      "step": 3553
    },
    {
      "epoch": 1.0400936494000586,
      "grad_norm": 0.005862652789801359,
      "learning_rate": 2.399765876499854e-05,
      "loss": 0.0001,
      "step": 3554
    },
    {
      "epoch": 1.0403863037752414,
      "grad_norm": 0.028307737782597542,
      "learning_rate": 2.3990342405618967e-05,
      "loss": 0.0004,
      "step": 3555
    },
    {
      "epoch": 1.0406789581504243,
      "grad_norm": 0.10708467662334442,
      "learning_rate": 2.398302604623939e-05,
      "loss": 0.001,
      "step": 3556
    },
    {
      "epoch": 1.0409716125256072,
      "grad_norm": 0.11359408497810364,
      "learning_rate": 2.397570968685982e-05,
      "loss": 0.001,
      "step": 3557
    },
    {
      "epoch": 1.04126426690079,
      "grad_norm": 0.021371543407440186,
      "learning_rate": 2.3968393327480247e-05,
      "loss": 0.0003,
      "step": 3558
    },
    {
      "epoch": 1.041556921275973,
      "grad_norm": 0.0009941854514181614,
      "learning_rate": 2.3961076968100675e-05,
      "loss": 0.0,
      "step": 3559
    },
    {
      "epoch": 1.0418495756511559,
      "grad_norm": 0.003969173412770033,
      "learning_rate": 2.39537606087211e-05,
      "loss": 0.0001,
      "step": 3560
    },
    {
      "epoch": 1.042142230026339,
      "grad_norm": 0.011651767417788506,
      "learning_rate": 2.3946444249341528e-05,
      "loss": 0.0002,
      "step": 3561
    },
    {
      "epoch": 1.0424348844015219,
      "grad_norm": 0.0031273127533495426,
      "learning_rate": 2.3939127889961956e-05,
      "loss": 0.0001,
      "step": 3562
    },
    {
      "epoch": 1.0427275387767048,
      "grad_norm": 0.0070342631079256535,
      "learning_rate": 2.3931811530582384e-05,
      "loss": 0.0001,
      "step": 3563
    },
    {
      "epoch": 1.0430201931518877,
      "grad_norm": 0.018413899466395378,
      "learning_rate": 2.3924495171202808e-05,
      "loss": 0.0002,
      "step": 3564
    },
    {
      "epoch": 1.0433128475270705,
      "grad_norm": 0.005958537105470896,
      "learning_rate": 2.3917178811823236e-05,
      "loss": 0.0001,
      "step": 3565
    },
    {
      "epoch": 1.0436055019022534,
      "grad_norm": 0.0010462752543389797,
      "learning_rate": 2.3909862452443664e-05,
      "loss": 0.0,
      "step": 3566
    },
    {
      "epoch": 1.0438981562774363,
      "grad_norm": 0.0037645921111106873,
      "learning_rate": 2.3902546093064092e-05,
      "loss": 0.0001,
      "step": 3567
    },
    {
      "epoch": 1.0441908106526192,
      "grad_norm": 0.0006369265611283481,
      "learning_rate": 2.389522973368452e-05,
      "loss": 0.0,
      "step": 3568
    },
    {
      "epoch": 1.044483465027802,
      "grad_norm": 0.004025363363325596,
      "learning_rate": 2.3887913374304945e-05,
      "loss": 0.0001,
      "step": 3569
    },
    {
      "epoch": 1.044776119402985,
      "grad_norm": 10.666226387023926,
      "learning_rate": 2.3880597014925373e-05,
      "loss": 0.081,
      "step": 3570
    },
    {
      "epoch": 1.0450687737781679,
      "grad_norm": 0.0009509926312603056,
      "learning_rate": 2.38732806555458e-05,
      "loss": 0.0,
      "step": 3571
    },
    {
      "epoch": 1.045361428153351,
      "grad_norm": 0.002926565706729889,
      "learning_rate": 2.386596429616623e-05,
      "loss": 0.0001,
      "step": 3572
    },
    {
      "epoch": 1.0456540825285339,
      "grad_norm": 0.0013789376243948936,
      "learning_rate": 2.3858647936786656e-05,
      "loss": 0.0,
      "step": 3573
    },
    {
      "epoch": 1.0459467369037168,
      "grad_norm": 0.0010121538070961833,
      "learning_rate": 2.385133157740708e-05,
      "loss": 0.0,
      "step": 3574
    },
    {
      "epoch": 1.0462393912788996,
      "grad_norm": 0.01967390812933445,
      "learning_rate": 2.384401521802751e-05,
      "loss": 0.0002,
      "step": 3575
    },
    {
      "epoch": 1.0465320456540825,
      "grad_norm": 0.001700015040114522,
      "learning_rate": 2.3836698858647937e-05,
      "loss": 0.0,
      "step": 3576
    },
    {
      "epoch": 1.0468247000292654,
      "grad_norm": 0.013245640322566032,
      "learning_rate": 2.3829382499268365e-05,
      "loss": 0.0001,
      "step": 3577
    },
    {
      "epoch": 1.0471173544044483,
      "grad_norm": 0.010698795318603516,
      "learning_rate": 2.3822066139888793e-05,
      "loss": 0.0002,
      "step": 3578
    },
    {
      "epoch": 1.0474100087796312,
      "grad_norm": 0.004378386773169041,
      "learning_rate": 2.3814749780509217e-05,
      "loss": 0.0001,
      "step": 3579
    },
    {
      "epoch": 1.047702663154814,
      "grad_norm": 0.0035175455268472433,
      "learning_rate": 2.3807433421129645e-05,
      "loss": 0.0001,
      "step": 3580
    },
    {
      "epoch": 1.047995317529997,
      "grad_norm": 0.0016487077809870243,
      "learning_rate": 2.3800117061750073e-05,
      "loss": 0.0,
      "step": 3581
    },
    {
      "epoch": 1.04828797190518,
      "grad_norm": 0.001347344950772822,
      "learning_rate": 2.37928007023705e-05,
      "loss": 0.0,
      "step": 3582
    },
    {
      "epoch": 1.048580626280363,
      "grad_norm": 0.007687774952501059,
      "learning_rate": 2.378548434299093e-05,
      "loss": 0.0001,
      "step": 3583
    },
    {
      "epoch": 1.0488732806555459,
      "grad_norm": 0.0017204730538651347,
      "learning_rate": 2.3778167983611357e-05,
      "loss": 0.0,
      "step": 3584
    },
    {
      "epoch": 1.0491659350307287,
      "grad_norm": 0.0008622069726698101,
      "learning_rate": 2.377085162423178e-05,
      "loss": 0.0,
      "step": 3585
    },
    {
      "epoch": 1.0494585894059116,
      "grad_norm": 0.001416791696101427,
      "learning_rate": 2.376353526485221e-05,
      "loss": 0.0,
      "step": 3586
    },
    {
      "epoch": 1.0497512437810945,
      "grad_norm": 0.0008721469203010201,
      "learning_rate": 2.3756218905472638e-05,
      "loss": 0.0,
      "step": 3587
    },
    {
      "epoch": 1.0500438981562774,
      "grad_norm": 0.2702424228191376,
      "learning_rate": 2.3748902546093066e-05,
      "loss": 0.0009,
      "step": 3588
    },
    {
      "epoch": 1.0503365525314603,
      "grad_norm": 0.014712399803102016,
      "learning_rate": 2.3741586186713493e-05,
      "loss": 0.0001,
      "step": 3589
    },
    {
      "epoch": 1.0506292069066432,
      "grad_norm": 0.000925264845136553,
      "learning_rate": 2.3734269827333918e-05,
      "loss": 0.0,
      "step": 3590
    },
    {
      "epoch": 1.050921861281826,
      "grad_norm": 0.0012234977912157774,
      "learning_rate": 2.3726953467954346e-05,
      "loss": 0.0,
      "step": 3591
    },
    {
      "epoch": 1.051214515657009,
      "grad_norm": 0.0011096167145296931,
      "learning_rate": 2.3719637108574774e-05,
      "loss": 0.0,
      "step": 3592
    },
    {
      "epoch": 1.051507170032192,
      "grad_norm": 0.002764485077932477,
      "learning_rate": 2.3712320749195202e-05,
      "loss": 0.0,
      "step": 3593
    },
    {
      "epoch": 1.051799824407375,
      "grad_norm": 0.0006492521497420967,
      "learning_rate": 2.370500438981563e-05,
      "loss": 0.0,
      "step": 3594
    },
    {
      "epoch": 1.0520924787825578,
      "grad_norm": 0.052430085837841034,
      "learning_rate": 2.3697688030436054e-05,
      "loss": 0.0002,
      "step": 3595
    },
    {
      "epoch": 1.0523851331577407,
      "grad_norm": 0.003093923907727003,
      "learning_rate": 2.3690371671056482e-05,
      "loss": 0.0,
      "step": 3596
    },
    {
      "epoch": 1.0526777875329236,
      "grad_norm": 0.0010897924657911062,
      "learning_rate": 2.368305531167691e-05,
      "loss": 0.0,
      "step": 3597
    },
    {
      "epoch": 1.0529704419081065,
      "grad_norm": 0.0005833354080095887,
      "learning_rate": 2.3675738952297338e-05,
      "loss": 0.0,
      "step": 3598
    },
    {
      "epoch": 1.0532630962832894,
      "grad_norm": 0.00032545908470638096,
      "learning_rate": 2.3668422592917766e-05,
      "loss": 0.0,
      "step": 3599
    },
    {
      "epoch": 1.0535557506584723,
      "grad_norm": 0.0004899411578662694,
      "learning_rate": 2.3661106233538194e-05,
      "loss": 0.0,
      "step": 3600
    },
    {
      "epoch": 1.0538484050336552,
      "grad_norm": 0.0058258599601686,
      "learning_rate": 2.365378987415862e-05,
      "loss": 0.0,
      "step": 3601
    },
    {
      "epoch": 1.054141059408838,
      "grad_norm": 0.0022187926806509495,
      "learning_rate": 2.3646473514779047e-05,
      "loss": 0.0,
      "step": 3602
    },
    {
      "epoch": 1.0544337137840212,
      "grad_norm": 0.0009017025586217642,
      "learning_rate": 2.3639157155399475e-05,
      "loss": 0.0,
      "step": 3603
    },
    {
      "epoch": 1.054726368159204,
      "grad_norm": 0.031748924404382706,
      "learning_rate": 2.3631840796019903e-05,
      "loss": 0.0002,
      "step": 3604
    },
    {
      "epoch": 1.055019022534387,
      "grad_norm": 0.002086823107674718,
      "learning_rate": 2.362452443664033e-05,
      "loss": 0.0,
      "step": 3605
    },
    {
      "epoch": 1.0553116769095698,
      "grad_norm": 0.001307449652813375,
      "learning_rate": 2.3617208077260755e-05,
      "loss": 0.0,
      "step": 3606
    },
    {
      "epoch": 1.0556043312847527,
      "grad_norm": 0.0016552209854125977,
      "learning_rate": 2.3609891717881183e-05,
      "loss": 0.0,
      "step": 3607
    },
    {
      "epoch": 1.0558969856599356,
      "grad_norm": 0.0003705853596329689,
      "learning_rate": 2.360257535850161e-05,
      "loss": 0.0,
      "step": 3608
    },
    {
      "epoch": 1.0561896400351185,
      "grad_norm": 0.0012484462931752205,
      "learning_rate": 2.359525899912204e-05,
      "loss": 0.0,
      "step": 3609
    },
    {
      "epoch": 1.0564822944103014,
      "grad_norm": 0.000867952941916883,
      "learning_rate": 2.3587942639742467e-05,
      "loss": 0.0,
      "step": 3610
    },
    {
      "epoch": 1.0567749487854843,
      "grad_norm": 0.0009844473097473383,
      "learning_rate": 2.358062628036289e-05,
      "loss": 0.0,
      "step": 3611
    },
    {
      "epoch": 1.0570676031606672,
      "grad_norm": 0.00044872480793856084,
      "learning_rate": 2.357330992098332e-05,
      "loss": 0.0,
      "step": 3612
    },
    {
      "epoch": 1.05736025753585,
      "grad_norm": 0.012851194478571415,
      "learning_rate": 2.3565993561603747e-05,
      "loss": 0.0001,
      "step": 3613
    },
    {
      "epoch": 1.0576529119110332,
      "grad_norm": 0.0010048080002889037,
      "learning_rate": 2.3558677202224175e-05,
      "loss": 0.0,
      "step": 3614
    },
    {
      "epoch": 1.057945566286216,
      "grad_norm": 0.0013905661180615425,
      "learning_rate": 2.3551360842844603e-05,
      "loss": 0.0,
      "step": 3615
    },
    {
      "epoch": 1.058238220661399,
      "grad_norm": 0.009411433711647987,
      "learning_rate": 2.3544044483465028e-05,
      "loss": 0.0001,
      "step": 3616
    },
    {
      "epoch": 1.0585308750365818,
      "grad_norm": 0.0010081817163154483,
      "learning_rate": 2.3536728124085456e-05,
      "loss": 0.0,
      "step": 3617
    },
    {
      "epoch": 1.0588235294117647,
      "grad_norm": 0.004782139789313078,
      "learning_rate": 2.3529411764705884e-05,
      "loss": 0.0001,
      "step": 3618
    },
    {
      "epoch": 1.0591161837869476,
      "grad_norm": 0.0008458578959107399,
      "learning_rate": 2.352209540532631e-05,
      "loss": 0.0,
      "step": 3619
    },
    {
      "epoch": 1.0594088381621305,
      "grad_norm": 0.0006553055136464536,
      "learning_rate": 2.351477904594674e-05,
      "loss": 0.0,
      "step": 3620
    },
    {
      "epoch": 1.0597014925373134,
      "grad_norm": 0.0006589387194253504,
      "learning_rate": 2.3507462686567168e-05,
      "loss": 0.0,
      "step": 3621
    },
    {
      "epoch": 1.0599941469124963,
      "grad_norm": 0.0006570523255504668,
      "learning_rate": 2.3500146327187592e-05,
      "loss": 0.0,
      "step": 3622
    },
    {
      "epoch": 1.0602868012876792,
      "grad_norm": 0.0013346909545361996,
      "learning_rate": 2.349282996780802e-05,
      "loss": 0.0,
      "step": 3623
    },
    {
      "epoch": 1.0605794556628623,
      "grad_norm": 0.0007223114371299744,
      "learning_rate": 2.3485513608428448e-05,
      "loss": 0.0,
      "step": 3624
    },
    {
      "epoch": 1.0608721100380452,
      "grad_norm": 0.00046515671419911087,
      "learning_rate": 2.3478197249048876e-05,
      "loss": 0.0,
      "step": 3625
    },
    {
      "epoch": 1.061164764413228,
      "grad_norm": 0.0006506069330498576,
      "learning_rate": 2.3470880889669304e-05,
      "loss": 0.0,
      "step": 3626
    },
    {
      "epoch": 1.061457418788411,
      "grad_norm": 0.0005674001877196133,
      "learning_rate": 2.346356453028973e-05,
      "loss": 0.0,
      "step": 3627
    },
    {
      "epoch": 1.0617500731635938,
      "grad_norm": 4.653077125549316,
      "learning_rate": 2.3456248170910156e-05,
      "loss": 0.0164,
      "step": 3628
    },
    {
      "epoch": 1.0620427275387767,
      "grad_norm": 0.000740343879442662,
      "learning_rate": 2.3448931811530584e-05,
      "loss": 0.0,
      "step": 3629
    },
    {
      "epoch": 1.0623353819139596,
      "grad_norm": 0.0011946418089792132,
      "learning_rate": 2.3441615452151012e-05,
      "loss": 0.0,
      "step": 3630
    },
    {
      "epoch": 1.0626280362891425,
      "grad_norm": 0.00047488027485087514,
      "learning_rate": 2.343429909277144e-05,
      "loss": 0.0,
      "step": 3631
    },
    {
      "epoch": 1.0629206906643254,
      "grad_norm": 0.0004698278789874166,
      "learning_rate": 2.3426982733391865e-05,
      "loss": 0.0,
      "step": 3632
    },
    {
      "epoch": 1.0632133450395083,
      "grad_norm": 0.003856950905174017,
      "learning_rate": 2.3419666374012293e-05,
      "loss": 0.0,
      "step": 3633
    },
    {
      "epoch": 1.0635059994146911,
      "grad_norm": 0.0016928977565839887,
      "learning_rate": 2.341235001463272e-05,
      "loss": 0.0,
      "step": 3634
    },
    {
      "epoch": 1.0637986537898743,
      "grad_norm": 0.006070285104215145,
      "learning_rate": 2.340503365525315e-05,
      "loss": 0.0001,
      "step": 3635
    },
    {
      "epoch": 1.0640913081650571,
      "grad_norm": 0.0767374038696289,
      "learning_rate": 2.3397717295873577e-05,
      "loss": 0.0002,
      "step": 3636
    },
    {
      "epoch": 1.06438396254024,
      "grad_norm": 0.01066882349550724,
      "learning_rate": 2.3390400936494005e-05,
      "loss": 0.0001,
      "step": 3637
    },
    {
      "epoch": 1.064676616915423,
      "grad_norm": 0.2522182762622833,
      "learning_rate": 2.338308457711443e-05,
      "loss": 0.0013,
      "step": 3638
    },
    {
      "epoch": 1.0649692712906058,
      "grad_norm": 0.4211108982563019,
      "learning_rate": 2.3375768217734857e-05,
      "loss": 0.0011,
      "step": 3639
    },
    {
      "epoch": 1.0652619256657887,
      "grad_norm": 0.06459060311317444,
      "learning_rate": 2.3368451858355285e-05,
      "loss": 0.0004,
      "step": 3640
    },
    {
      "epoch": 1.0655545800409716,
      "grad_norm": 4.673795223236084,
      "learning_rate": 2.3361135498975713e-05,
      "loss": 0.2605,
      "step": 3641
    },
    {
      "epoch": 1.0658472344161545,
      "grad_norm": 0.0006907483912073076,
      "learning_rate": 2.3353819139596138e-05,
      "loss": 0.0,
      "step": 3642
    },
    {
      "epoch": 1.0661398887913374,
      "grad_norm": 0.0042630466632544994,
      "learning_rate": 2.3346502780216565e-05,
      "loss": 0.0,
      "step": 3643
    },
    {
      "epoch": 1.0664325431665203,
      "grad_norm": 0.0007183166453614831,
      "learning_rate": 2.3339186420836993e-05,
      "loss": 0.0,
      "step": 3644
    },
    {
      "epoch": 1.0667251975417034,
      "grad_norm": 0.0004632271302398294,
      "learning_rate": 2.333187006145742e-05,
      "loss": 0.0,
      "step": 3645
    },
    {
      "epoch": 1.0670178519168863,
      "grad_norm": 0.0017585419118404388,
      "learning_rate": 2.3324553702077846e-05,
      "loss": 0.0,
      "step": 3646
    },
    {
      "epoch": 1.0673105062920691,
      "grad_norm": 0.0007347252685576677,
      "learning_rate": 2.3317237342698274e-05,
      "loss": 0.0,
      "step": 3647
    },
    {
      "epoch": 1.067603160667252,
      "grad_norm": 0.00169753294903785,
      "learning_rate": 2.3309920983318702e-05,
      "loss": 0.0,
      "step": 3648
    },
    {
      "epoch": 1.067895815042435,
      "grad_norm": 0.0028705827426165342,
      "learning_rate": 2.330260462393913e-05,
      "loss": 0.0001,
      "step": 3649
    },
    {
      "epoch": 1.0681884694176178,
      "grad_norm": 0.030678434297442436,
      "learning_rate": 2.3295288264559554e-05,
      "loss": 0.0002,
      "step": 3650
    },
    {
      "epoch": 1.0684811237928007,
      "grad_norm": 0.0030463761650025845,
      "learning_rate": 2.3287971905179982e-05,
      "loss": 0.0001,
      "step": 3651
    },
    {
      "epoch": 1.0687737781679836,
      "grad_norm": 0.0037456315476447344,
      "learning_rate": 2.328065554580041e-05,
      "loss": 0.0001,
      "step": 3652
    },
    {
      "epoch": 1.0690664325431665,
      "grad_norm": 0.0034187056589871645,
      "learning_rate": 2.3273339186420838e-05,
      "loss": 0.0001,
      "step": 3653
    },
    {
      "epoch": 1.0693590869183494,
      "grad_norm": 0.014863521791994572,
      "learning_rate": 2.3266022827041263e-05,
      "loss": 0.0002,
      "step": 3654
    },
    {
      "epoch": 1.0696517412935322,
      "grad_norm": 0.0028082518838346004,
      "learning_rate": 2.325870646766169e-05,
      "loss": 0.0001,
      "step": 3655
    },
    {
      "epoch": 1.0699443956687151,
      "grad_norm": 0.009496224112808704,
      "learning_rate": 2.325139010828212e-05,
      "loss": 0.0002,
      "step": 3656
    },
    {
      "epoch": 1.0702370500438982,
      "grad_norm": 0.0020644606556743383,
      "learning_rate": 2.3244073748902547e-05,
      "loss": 0.0,
      "step": 3657
    },
    {
      "epoch": 1.0705297044190811,
      "grad_norm": 0.009632423520088196,
      "learning_rate": 2.3236757389522975e-05,
      "loss": 0.0002,
      "step": 3658
    },
    {
      "epoch": 1.070822358794264,
      "grad_norm": 0.002899567363783717,
      "learning_rate": 2.32294410301434e-05,
      "loss": 0.0001,
      "step": 3659
    },
    {
      "epoch": 1.071115013169447,
      "grad_norm": 0.006019872613251209,
      "learning_rate": 2.3222124670763827e-05,
      "loss": 0.0001,
      "step": 3660
    },
    {
      "epoch": 1.0714076675446298,
      "grad_norm": 0.02681499533355236,
      "learning_rate": 2.3214808311384255e-05,
      "loss": 0.0003,
      "step": 3661
    },
    {
      "epoch": 1.0717003219198127,
      "grad_norm": 0.008631656877696514,
      "learning_rate": 2.3207491952004683e-05,
      "loss": 0.0002,
      "step": 3662
    },
    {
      "epoch": 1.0719929762949956,
      "grad_norm": 0.00794452615082264,
      "learning_rate": 2.320017559262511e-05,
      "loss": 0.0002,
      "step": 3663
    },
    {
      "epoch": 1.0722856306701785,
      "grad_norm": 0.24969637393951416,
      "learning_rate": 2.3192859233245536e-05,
      "loss": 0.0008,
      "step": 3664
    },
    {
      "epoch": 1.0725782850453613,
      "grad_norm": 0.003523320658132434,
      "learning_rate": 2.3185542873865963e-05,
      "loss": 0.0001,
      "step": 3665
    },
    {
      "epoch": 1.0728709394205442,
      "grad_norm": 0.2369241863489151,
      "learning_rate": 2.317822651448639e-05,
      "loss": 0.0011,
      "step": 3666
    },
    {
      "epoch": 1.0731635937957273,
      "grad_norm": 0.007222886197268963,
      "learning_rate": 2.317091015510682e-05,
      "loss": 0.0001,
      "step": 3667
    },
    {
      "epoch": 1.0734562481709102,
      "grad_norm": 0.007124431896954775,
      "learning_rate": 2.3163593795727247e-05,
      "loss": 0.0001,
      "step": 3668
    },
    {
      "epoch": 1.0737489025460931,
      "grad_norm": 0.005987787153571844,
      "learning_rate": 2.3156277436347675e-05,
      "loss": 0.0001,
      "step": 3669
    },
    {
      "epoch": 1.074041556921276,
      "grad_norm": 0.00803651474416256,
      "learning_rate": 2.31489610769681e-05,
      "loss": 0.0001,
      "step": 3670
    },
    {
      "epoch": 1.074334211296459,
      "grad_norm": 0.0025474862195551395,
      "learning_rate": 2.3141644717588528e-05,
      "loss": 0.0001,
      "step": 3671
    },
    {
      "epoch": 1.0746268656716418,
      "grad_norm": 0.0026336340233683586,
      "learning_rate": 2.3134328358208956e-05,
      "loss": 0.0001,
      "step": 3672
    },
    {
      "epoch": 1.0749195200468247,
      "grad_norm": 0.0007940291543491185,
      "learning_rate": 2.3127011998829384e-05,
      "loss": 0.0,
      "step": 3673
    },
    {
      "epoch": 1.0752121744220076,
      "grad_norm": 0.001279535237699747,
      "learning_rate": 2.311969563944981e-05,
      "loss": 0.0,
      "step": 3674
    },
    {
      "epoch": 1.0755048287971904,
      "grad_norm": 0.0010387222282588482,
      "learning_rate": 2.3112379280070236e-05,
      "loss": 0.0,
      "step": 3675
    },
    {
      "epoch": 1.0757974831723733,
      "grad_norm": 0.004831269383430481,
      "learning_rate": 2.3105062920690664e-05,
      "loss": 0.0001,
      "step": 3676
    },
    {
      "epoch": 1.0760901375475562,
      "grad_norm": 0.0038754655979573727,
      "learning_rate": 2.3097746561311092e-05,
      "loss": 0.0001,
      "step": 3677
    },
    {
      "epoch": 1.0763827919227393,
      "grad_norm": 0.0028973727021366358,
      "learning_rate": 2.309043020193152e-05,
      "loss": 0.0001,
      "step": 3678
    },
    {
      "epoch": 1.0766754462979222,
      "grad_norm": 0.0024733960162848234,
      "learning_rate": 2.3083113842551948e-05,
      "loss": 0.0001,
      "step": 3679
    },
    {
      "epoch": 1.076968100673105,
      "grad_norm": 0.002890933770686388,
      "learning_rate": 2.3075797483172373e-05,
      "loss": 0.0001,
      "step": 3680
    },
    {
      "epoch": 1.077260755048288,
      "grad_norm": 0.002433110261335969,
      "learning_rate": 2.30684811237928e-05,
      "loss": 0.0,
      "step": 3681
    },
    {
      "epoch": 1.0775534094234709,
      "grad_norm": 0.15135546028614044,
      "learning_rate": 2.306116476441323e-05,
      "loss": 0.0003,
      "step": 3682
    },
    {
      "epoch": 1.0778460637986538,
      "grad_norm": 0.0029673471581190825,
      "learning_rate": 2.3053848405033656e-05,
      "loss": 0.0001,
      "step": 3683
    },
    {
      "epoch": 1.0781387181738367,
      "grad_norm": 0.00278499280102551,
      "learning_rate": 2.3046532045654084e-05,
      "loss": 0.0001,
      "step": 3684
    },
    {
      "epoch": 1.0784313725490196,
      "grad_norm": 0.0024187383241951466,
      "learning_rate": 2.303921568627451e-05,
      "loss": 0.0,
      "step": 3685
    },
    {
      "epoch": 1.0787240269242024,
      "grad_norm": 0.0009933672845363617,
      "learning_rate": 2.3031899326894937e-05,
      "loss": 0.0,
      "step": 3686
    },
    {
      "epoch": 1.0790166812993853,
      "grad_norm": 0.0012278319336473942,
      "learning_rate": 2.3024582967515365e-05,
      "loss": 0.0,
      "step": 3687
    },
    {
      "epoch": 1.0793093356745684,
      "grad_norm": 0.01352784875780344,
      "learning_rate": 2.3017266608135793e-05,
      "loss": 0.0001,
      "step": 3688
    },
    {
      "epoch": 1.0796019900497513,
      "grad_norm": 0.003959336783736944,
      "learning_rate": 2.300995024875622e-05,
      "loss": 0.0001,
      "step": 3689
    },
    {
      "epoch": 1.0798946444249342,
      "grad_norm": 0.0008704708307050169,
      "learning_rate": 2.300263388937665e-05,
      "loss": 0.0,
      "step": 3690
    },
    {
      "epoch": 1.080187298800117,
      "grad_norm": 0.0008812810410745442,
      "learning_rate": 2.2995317529997073e-05,
      "loss": 0.0,
      "step": 3691
    },
    {
      "epoch": 1.0804799531753,
      "grad_norm": 0.007243090774863958,
      "learning_rate": 2.29880011706175e-05,
      "loss": 0.0001,
      "step": 3692
    },
    {
      "epoch": 1.0807726075504829,
      "grad_norm": 0.0025750931818038225,
      "learning_rate": 2.298068481123793e-05,
      "loss": 0.0001,
      "step": 3693
    },
    {
      "epoch": 1.0810652619256658,
      "grad_norm": 0.0019465017830953002,
      "learning_rate": 2.2973368451858357e-05,
      "loss": 0.0,
      "step": 3694
    },
    {
      "epoch": 1.0813579163008487,
      "grad_norm": 0.0012004871387034655,
      "learning_rate": 2.2966052092478785e-05,
      "loss": 0.0,
      "step": 3695
    },
    {
      "epoch": 1.0816505706760315,
      "grad_norm": 0.0010097112972289324,
      "learning_rate": 2.295873573309921e-05,
      "loss": 0.0,
      "step": 3696
    },
    {
      "epoch": 1.0819432250512144,
      "grad_norm": 0.0009873710805550218,
      "learning_rate": 2.2951419373719638e-05,
      "loss": 0.0,
      "step": 3697
    },
    {
      "epoch": 1.0822358794263973,
      "grad_norm": 0.0013015306321904063,
      "learning_rate": 2.2944103014340065e-05,
      "loss": 0.0,
      "step": 3698
    },
    {
      "epoch": 1.0825285338015804,
      "grad_norm": 0.0013580222148448229,
      "learning_rate": 2.2936786654960493e-05,
      "loss": 0.0,
      "step": 3699
    },
    {
      "epoch": 1.0828211881767633,
      "grad_norm": 0.00278102932497859,
      "learning_rate": 2.292947029558092e-05,
      "loss": 0.0001,
      "step": 3700
    },
    {
      "epoch": 1.0831138425519462,
      "grad_norm": 0.008593583479523659,
      "learning_rate": 2.2922153936201346e-05,
      "loss": 0.0001,
      "step": 3701
    },
    {
      "epoch": 1.083406496927129,
      "grad_norm": 0.0005224637570790946,
      "learning_rate": 2.2914837576821774e-05,
      "loss": 0.0,
      "step": 3702
    },
    {
      "epoch": 1.083699151302312,
      "grad_norm": 0.004331057891249657,
      "learning_rate": 2.2907521217442202e-05,
      "loss": 0.0001,
      "step": 3703
    },
    {
      "epoch": 1.0839918056774949,
      "grad_norm": 0.0006074766279198229,
      "learning_rate": 2.290020485806263e-05,
      "loss": 0.0,
      "step": 3704
    },
    {
      "epoch": 1.0842844600526778,
      "grad_norm": 0.0010289569618180394,
      "learning_rate": 2.2892888498683058e-05,
      "loss": 0.0,
      "step": 3705
    },
    {
      "epoch": 1.0845771144278606,
      "grad_norm": 0.0007409527315758169,
      "learning_rate": 2.2885572139303486e-05,
      "loss": 0.0,
      "step": 3706
    },
    {
      "epoch": 1.0848697688030435,
      "grad_norm": 0.0006891212542541325,
      "learning_rate": 2.287825577992391e-05,
      "loss": 0.0,
      "step": 3707
    },
    {
      "epoch": 1.0851624231782264,
      "grad_norm": 0.001396518899127841,
      "learning_rate": 2.2870939420544338e-05,
      "loss": 0.0,
      "step": 3708
    },
    {
      "epoch": 1.0854550775534095,
      "grad_norm": 0.001946881995536387,
      "learning_rate": 2.2863623061164766e-05,
      "loss": 0.0,
      "step": 3709
    },
    {
      "epoch": 1.0857477319285924,
      "grad_norm": 0.000842281908262521,
      "learning_rate": 2.2856306701785194e-05,
      "loss": 0.0,
      "step": 3710
    },
    {
      "epoch": 1.0860403863037753,
      "grad_norm": 0.0006059493171051145,
      "learning_rate": 2.2848990342405622e-05,
      "loss": 0.0,
      "step": 3711
    },
    {
      "epoch": 1.0863330406789582,
      "grad_norm": 0.003021880518645048,
      "learning_rate": 2.2841673983026047e-05,
      "loss": 0.0,
      "step": 3712
    },
    {
      "epoch": 1.086625695054141,
      "grad_norm": 0.0009339431999251246,
      "learning_rate": 2.2834357623646475e-05,
      "loss": 0.0,
      "step": 3713
    },
    {
      "epoch": 1.086918349429324,
      "grad_norm": 0.0011237096041440964,
      "learning_rate": 2.2827041264266903e-05,
      "loss": 0.0,
      "step": 3714
    },
    {
      "epoch": 1.0872110038045069,
      "grad_norm": 0.002793660154566169,
      "learning_rate": 2.281972490488733e-05,
      "loss": 0.0,
      "step": 3715
    },
    {
      "epoch": 1.0875036581796897,
      "grad_norm": 0.0008379679638892412,
      "learning_rate": 2.281240854550776e-05,
      "loss": 0.0,
      "step": 3716
    },
    {
      "epoch": 1.0877963125548726,
      "grad_norm": 0.0008086669258773327,
      "learning_rate": 2.2805092186128183e-05,
      "loss": 0.0,
      "step": 3717
    },
    {
      "epoch": 1.0880889669300555,
      "grad_norm": 0.004301237873733044,
      "learning_rate": 2.279777582674861e-05,
      "loss": 0.0,
      "step": 3718
    },
    {
      "epoch": 1.0883816213052384,
      "grad_norm": 0.0007910404237918556,
      "learning_rate": 2.279045946736904e-05,
      "loss": 0.0,
      "step": 3719
    },
    {
      "epoch": 1.0886742756804215,
      "grad_norm": 0.0005540227284654975,
      "learning_rate": 2.2783143107989467e-05,
      "loss": 0.0,
      "step": 3720
    },
    {
      "epoch": 1.0889669300556044,
      "grad_norm": 0.002769920974969864,
      "learning_rate": 2.2775826748609895e-05,
      "loss": 0.0,
      "step": 3721
    },
    {
      "epoch": 1.0892595844307873,
      "grad_norm": 0.0006901599117554724,
      "learning_rate": 2.2768510389230323e-05,
      "loss": 0.0,
      "step": 3722
    },
    {
      "epoch": 1.0895522388059702,
      "grad_norm": 0.016822081059217453,
      "learning_rate": 2.2761194029850747e-05,
      "loss": 0.0001,
      "step": 3723
    },
    {
      "epoch": 1.089844893181153,
      "grad_norm": 0.0010369161609560251,
      "learning_rate": 2.2753877670471175e-05,
      "loss": 0.0,
      "step": 3724
    },
    {
      "epoch": 1.090137547556336,
      "grad_norm": 0.0011231061071157455,
      "learning_rate": 2.2746561311091603e-05,
      "loss": 0.0,
      "step": 3725
    },
    {
      "epoch": 1.0904302019315188,
      "grad_norm": 0.001505909371189773,
      "learning_rate": 2.273924495171203e-05,
      "loss": 0.0,
      "step": 3726
    },
    {
      "epoch": 1.0907228563067017,
      "grad_norm": 0.0013153402833268046,
      "learning_rate": 2.273192859233246e-05,
      "loss": 0.0,
      "step": 3727
    },
    {
      "epoch": 1.0910155106818846,
      "grad_norm": 0.0010314127430319786,
      "learning_rate": 2.2724612232952884e-05,
      "loss": 0.0,
      "step": 3728
    },
    {
      "epoch": 1.0913081650570675,
      "grad_norm": 0.000518880202434957,
      "learning_rate": 2.271729587357331e-05,
      "loss": 0.0,
      "step": 3729
    },
    {
      "epoch": 1.0916008194322506,
      "grad_norm": 0.0004906303365714848,
      "learning_rate": 2.270997951419374e-05,
      "loss": 0.0,
      "step": 3730
    },
    {
      "epoch": 1.0918934738074335,
      "grad_norm": 0.00047414834261871874,
      "learning_rate": 2.2702663154814168e-05,
      "loss": 0.0,
      "step": 3731
    },
    {
      "epoch": 1.0921861281826164,
      "grad_norm": 0.0015571311814710498,
      "learning_rate": 2.2695346795434595e-05,
      "loss": 0.0,
      "step": 3732
    },
    {
      "epoch": 1.0924787825577993,
      "grad_norm": 0.015585982240736485,
      "learning_rate": 2.268803043605502e-05,
      "loss": 0.0001,
      "step": 3733
    },
    {
      "epoch": 1.0927714369329822,
      "grad_norm": 0.0012408023467287421,
      "learning_rate": 2.2680714076675448e-05,
      "loss": 0.0,
      "step": 3734
    },
    {
      "epoch": 1.093064091308165,
      "grad_norm": 0.0005492728087119758,
      "learning_rate": 2.2673397717295876e-05,
      "loss": 0.0,
      "step": 3735
    },
    {
      "epoch": 1.093356745683348,
      "grad_norm": 0.000611587893217802,
      "learning_rate": 2.2666081357916304e-05,
      "loss": 0.0,
      "step": 3736
    },
    {
      "epoch": 1.0936494000585308,
      "grad_norm": 0.0016189048765227199,
      "learning_rate": 2.265876499853673e-05,
      "loss": 0.0,
      "step": 3737
    },
    {
      "epoch": 1.0939420544337137,
      "grad_norm": 0.0011567751644179225,
      "learning_rate": 2.2651448639157156e-05,
      "loss": 0.0,
      "step": 3738
    },
    {
      "epoch": 1.0942347088088966,
      "grad_norm": 0.009464586153626442,
      "learning_rate": 2.2644132279777584e-05,
      "loss": 0.0001,
      "step": 3739
    },
    {
      "epoch": 1.0945273631840795,
      "grad_norm": 0.0015173718566074967,
      "learning_rate": 2.2636815920398012e-05,
      "loss": 0.0,
      "step": 3740
    },
    {
      "epoch": 1.0948200175592626,
      "grad_norm": 0.0005763802328146994,
      "learning_rate": 2.2629499561018437e-05,
      "loss": 0.0,
      "step": 3741
    },
    {
      "epoch": 1.0951126719344455,
      "grad_norm": 0.0008895478094927967,
      "learning_rate": 2.2622183201638865e-05,
      "loss": 0.0,
      "step": 3742
    },
    {
      "epoch": 1.0954053263096284,
      "grad_norm": 0.0006199186318553984,
      "learning_rate": 2.2614866842259293e-05,
      "loss": 0.0,
      "step": 3743
    },
    {
      "epoch": 1.0956979806848113,
      "grad_norm": 0.0002753791050054133,
      "learning_rate": 2.260755048287972e-05,
      "loss": 0.0,
      "step": 3744
    },
    {
      "epoch": 1.0959906350599942,
      "grad_norm": 0.000554372847545892,
      "learning_rate": 2.2600234123500145e-05,
      "loss": 0.0,
      "step": 3745
    },
    {
      "epoch": 1.096283289435177,
      "grad_norm": 0.0011892981128767133,
      "learning_rate": 2.2592917764120573e-05,
      "loss": 0.0,
      "step": 3746
    },
    {
      "epoch": 1.09657594381036,
      "grad_norm": 0.0006116424920037389,
      "learning_rate": 2.2585601404741e-05,
      "loss": 0.0,
      "step": 3747
    },
    {
      "epoch": 1.0968685981855428,
      "grad_norm": 0.004664978478103876,
      "learning_rate": 2.257828504536143e-05,
      "loss": 0.0,
      "step": 3748
    },
    {
      "epoch": 1.0971612525607257,
      "grad_norm": 0.0008806657278910279,
      "learning_rate": 2.2570968685981854e-05,
      "loss": 0.0,
      "step": 3749
    },
    {
      "epoch": 1.0974539069359086,
      "grad_norm": 0.00028525033849291503,
      "learning_rate": 2.256365232660228e-05,
      "loss": 0.0,
      "step": 3750
    },
    {
      "epoch": 1.0977465613110917,
      "grad_norm": 0.0006327142473310232,
      "learning_rate": 2.255633596722271e-05,
      "loss": 0.0,
      "step": 3751
    },
    {
      "epoch": 1.0980392156862746,
      "grad_norm": 0.00039784680120646954,
      "learning_rate": 2.2549019607843138e-05,
      "loss": 0.0,
      "step": 3752
    },
    {
      "epoch": 1.0983318700614575,
      "grad_norm": 0.0009211124270223081,
      "learning_rate": 2.2541703248463565e-05,
      "loss": 0.0,
      "step": 3753
    },
    {
      "epoch": 1.0986245244366404,
      "grad_norm": 0.00081194459926337,
      "learning_rate": 2.253438688908399e-05,
      "loss": 0.0,
      "step": 3754
    },
    {
      "epoch": 1.0989171788118233,
      "grad_norm": 0.0010225963778793812,
      "learning_rate": 2.2527070529704418e-05,
      "loss": 0.0,
      "step": 3755
    },
    {
      "epoch": 1.0992098331870062,
      "grad_norm": 0.0015989018138498068,
      "learning_rate": 2.2519754170324846e-05,
      "loss": 0.0,
      "step": 3756
    },
    {
      "epoch": 1.099502487562189,
      "grad_norm": 0.13136620819568634,
      "learning_rate": 2.2512437810945274e-05,
      "loss": 0.0004,
      "step": 3757
    },
    {
      "epoch": 1.099795141937372,
      "grad_norm": 0.009679006412625313,
      "learning_rate": 2.2505121451565702e-05,
      "loss": 0.0001,
      "step": 3758
    },
    {
      "epoch": 1.1000877963125548,
      "grad_norm": 0.00034133202279917896,
      "learning_rate": 2.249780509218613e-05,
      "loss": 0.0,
      "step": 3759
    },
    {
      "epoch": 1.1003804506877377,
      "grad_norm": 0.0007985789561644197,
      "learning_rate": 2.2490488732806554e-05,
      "loss": 0.0,
      "step": 3760
    },
    {
      "epoch": 1.1006731050629206,
      "grad_norm": 0.0003673519822768867,
      "learning_rate": 2.2483172373426982e-05,
      "loss": 0.0,
      "step": 3761
    },
    {
      "epoch": 1.1009657594381035,
      "grad_norm": 0.005583070684224367,
      "learning_rate": 2.247585601404741e-05,
      "loss": 0.0,
      "step": 3762
    },
    {
      "epoch": 1.1012584138132866,
      "grad_norm": 0.000653867784421891,
      "learning_rate": 2.2468539654667838e-05,
      "loss": 0.0,
      "step": 3763
    },
    {
      "epoch": 1.1015510681884695,
      "grad_norm": 0.000688259897287935,
      "learning_rate": 2.2461223295288266e-05,
      "loss": 0.0,
      "step": 3764
    },
    {
      "epoch": 1.1018437225636524,
      "grad_norm": 0.0013501718640327454,
      "learning_rate": 2.245390693590869e-05,
      "loss": 0.0,
      "step": 3765
    },
    {
      "epoch": 1.1021363769388353,
      "grad_norm": 0.0008501984993927181,
      "learning_rate": 2.244659057652912e-05,
      "loss": 0.0,
      "step": 3766
    },
    {
      "epoch": 1.1024290313140181,
      "grad_norm": 0.0009289407753385603,
      "learning_rate": 2.2439274217149547e-05,
      "loss": 0.0,
      "step": 3767
    },
    {
      "epoch": 1.102721685689201,
      "grad_norm": 0.00038086893619038165,
      "learning_rate": 2.2431957857769975e-05,
      "loss": 0.0,
      "step": 3768
    },
    {
      "epoch": 1.103014340064384,
      "grad_norm": 0.0015734551707282662,
      "learning_rate": 2.2424641498390403e-05,
      "loss": 0.0,
      "step": 3769
    },
    {
      "epoch": 1.1033069944395668,
      "grad_norm": 0.0005957810208201408,
      "learning_rate": 2.2417325139010827e-05,
      "loss": 0.0,
      "step": 3770
    },
    {
      "epoch": 1.1035996488147497,
      "grad_norm": 0.00042635598219931126,
      "learning_rate": 2.2410008779631255e-05,
      "loss": 0.0,
      "step": 3771
    },
    {
      "epoch": 1.1038923031899326,
      "grad_norm": 0.00046527470112778246,
      "learning_rate": 2.2402692420251683e-05,
      "loss": 0.0,
      "step": 3772
    },
    {
      "epoch": 1.1041849575651157,
      "grad_norm": 0.0016610355814918876,
      "learning_rate": 2.239537606087211e-05,
      "loss": 0.0,
      "step": 3773
    },
    {
      "epoch": 1.1044776119402986,
      "grad_norm": 0.0005930790212005377,
      "learning_rate": 2.238805970149254e-05,
      "loss": 0.0,
      "step": 3774
    },
    {
      "epoch": 1.1047702663154815,
      "grad_norm": 0.0014426594134420156,
      "learning_rate": 2.2380743342112967e-05,
      "loss": 0.0,
      "step": 3775
    },
    {
      "epoch": 1.1050629206906644,
      "grad_norm": 0.0018905469914898276,
      "learning_rate": 2.237342698273339e-05,
      "loss": 0.0,
      "step": 3776
    },
    {
      "epoch": 1.1053555750658473,
      "grad_norm": 0.005967998877167702,
      "learning_rate": 2.236611062335382e-05,
      "loss": 0.0001,
      "step": 3777
    },
    {
      "epoch": 1.1056482294410301,
      "grad_norm": 0.0006593017023988068,
      "learning_rate": 2.2358794263974247e-05,
      "loss": 0.0,
      "step": 3778
    },
    {
      "epoch": 1.105940883816213,
      "grad_norm": 0.00027490098727867007,
      "learning_rate": 2.2351477904594675e-05,
      "loss": 0.0,
      "step": 3779
    },
    {
      "epoch": 1.106233538191396,
      "grad_norm": 0.002138617681339383,
      "learning_rate": 2.2344161545215103e-05,
      "loss": 0.0,
      "step": 3780
    },
    {
      "epoch": 1.1065261925665788,
      "grad_norm": 0.0010131902527064085,
      "learning_rate": 2.2336845185835528e-05,
      "loss": 0.0,
      "step": 3781
    },
    {
      "epoch": 1.1068188469417617,
      "grad_norm": 0.006386586930602789,
      "learning_rate": 2.2329528826455956e-05,
      "loss": 0.0,
      "step": 3782
    },
    {
      "epoch": 1.1071115013169446,
      "grad_norm": 0.0010048957774415612,
      "learning_rate": 2.2322212467076384e-05,
      "loss": 0.0,
      "step": 3783
    },
    {
      "epoch": 1.1074041556921277,
      "grad_norm": 0.0010382654145359993,
      "learning_rate": 2.231489610769681e-05,
      "loss": 0.0,
      "step": 3784
    },
    {
      "epoch": 1.1076968100673106,
      "grad_norm": 0.001115762977860868,
      "learning_rate": 2.230757974831724e-05,
      "loss": 0.0,
      "step": 3785
    },
    {
      "epoch": 1.1079894644424935,
      "grad_norm": 0.00950796902179718,
      "learning_rate": 2.2300263388937664e-05,
      "loss": 0.0001,
      "step": 3786
    },
    {
      "epoch": 1.1082821188176764,
      "grad_norm": 0.0006844153394922614,
      "learning_rate": 2.2292947029558092e-05,
      "loss": 0.0,
      "step": 3787
    },
    {
      "epoch": 1.1085747731928592,
      "grad_norm": 0.001086718519218266,
      "learning_rate": 2.228563067017852e-05,
      "loss": 0.0,
      "step": 3788
    },
    {
      "epoch": 1.1088674275680421,
      "grad_norm": 0.0005012504407204688,
      "learning_rate": 2.2278314310798948e-05,
      "loss": 0.0,
      "step": 3789
    },
    {
      "epoch": 1.109160081943225,
      "grad_norm": 0.0007421081536449492,
      "learning_rate": 2.2270997951419376e-05,
      "loss": 0.0,
      "step": 3790
    },
    {
      "epoch": 1.109452736318408,
      "grad_norm": 0.0006756620132364333,
      "learning_rate": 2.2263681592039804e-05,
      "loss": 0.0,
      "step": 3791
    },
    {
      "epoch": 1.1097453906935908,
      "grad_norm": 0.0004992782487533987,
      "learning_rate": 2.225636523266023e-05,
      "loss": 0.0,
      "step": 3792
    },
    {
      "epoch": 1.1100380450687737,
      "grad_norm": 0.0009807657916098833,
      "learning_rate": 2.2249048873280656e-05,
      "loss": 0.0,
      "step": 3793
    },
    {
      "epoch": 1.1103306994439568,
      "grad_norm": 0.002366979606449604,
      "learning_rate": 2.2241732513901084e-05,
      "loss": 0.0,
      "step": 3794
    },
    {
      "epoch": 1.1106233538191397,
      "grad_norm": 0.00036062311846762896,
      "learning_rate": 2.2234416154521512e-05,
      "loss": 0.0,
      "step": 3795
    },
    {
      "epoch": 1.1109160081943226,
      "grad_norm": 0.00047910830471664667,
      "learning_rate": 2.222709979514194e-05,
      "loss": 0.0,
      "step": 3796
    },
    {
      "epoch": 1.1112086625695055,
      "grad_norm": 0.0006496130372397602,
      "learning_rate": 2.2219783435762365e-05,
      "loss": 0.0,
      "step": 3797
    },
    {
      "epoch": 1.1115013169446883,
      "grad_norm": 0.0006131701520644128,
      "learning_rate": 2.2212467076382793e-05,
      "loss": 0.0,
      "step": 3798
    },
    {
      "epoch": 1.1117939713198712,
      "grad_norm": 0.0005057764356024563,
      "learning_rate": 2.220515071700322e-05,
      "loss": 0.0,
      "step": 3799
    },
    {
      "epoch": 1.1120866256950541,
      "grad_norm": 0.0007563771214336157,
      "learning_rate": 2.219783435762365e-05,
      "loss": 0.0,
      "step": 3800
    },
    {
      "epoch": 1.112379280070237,
      "grad_norm": 3.374889612197876,
      "learning_rate": 2.2190517998244077e-05,
      "loss": 0.1949,
      "step": 3801
    },
    {
      "epoch": 1.11267193444542,
      "grad_norm": 0.0005034583737142384,
      "learning_rate": 2.21832016388645e-05,
      "loss": 0.0,
      "step": 3802
    },
    {
      "epoch": 1.1129645888206028,
      "grad_norm": 0.0008981447317637503,
      "learning_rate": 2.217588527948493e-05,
      "loss": 0.0,
      "step": 3803
    },
    {
      "epoch": 1.1132572431957857,
      "grad_norm": 0.00942637026309967,
      "learning_rate": 2.2168568920105357e-05,
      "loss": 0.0001,
      "step": 3804
    },
    {
      "epoch": 1.1135498975709688,
      "grad_norm": 0.07058790326118469,
      "learning_rate": 2.2161252560725785e-05,
      "loss": 0.0002,
      "step": 3805
    },
    {
      "epoch": 1.1138425519461517,
      "grad_norm": 0.0011174330720677972,
      "learning_rate": 2.2153936201346213e-05,
      "loss": 0.0,
      "step": 3806
    },
    {
      "epoch": 1.1141352063213346,
      "grad_norm": 0.030590757727622986,
      "learning_rate": 2.2146619841966638e-05,
      "loss": 0.0004,
      "step": 3807
    },
    {
      "epoch": 1.1144278606965174,
      "grad_norm": 0.014479140751063824,
      "learning_rate": 2.2139303482587065e-05,
      "loss": 0.0002,
      "step": 3808
    },
    {
      "epoch": 1.1147205150717003,
      "grad_norm": 1.097375750541687,
      "learning_rate": 2.2131987123207493e-05,
      "loss": 0.0029,
      "step": 3809
    },
    {
      "epoch": 1.1150131694468832,
      "grad_norm": 0.20258143544197083,
      "learning_rate": 2.212467076382792e-05,
      "loss": 0.0011,
      "step": 3810
    },
    {
      "epoch": 1.1153058238220661,
      "grad_norm": 0.01319414097815752,
      "learning_rate": 2.211735440444835e-05,
      "loss": 0.0002,
      "step": 3811
    },
    {
      "epoch": 1.115598478197249,
      "grad_norm": 0.001642485847696662,
      "learning_rate": 2.2110038045068777e-05,
      "loss": 0.0,
      "step": 3812
    },
    {
      "epoch": 1.1158911325724319,
      "grad_norm": 0.004467404447495937,
      "learning_rate": 2.2102721685689202e-05,
      "loss": 0.0001,
      "step": 3813
    },
    {
      "epoch": 1.1161837869476148,
      "grad_norm": 6.050930500030518,
      "learning_rate": 2.209540532630963e-05,
      "loss": 0.1099,
      "step": 3814
    },
    {
      "epoch": 1.1164764413227979,
      "grad_norm": 0.029940001666545868,
      "learning_rate": 2.2088088966930058e-05,
      "loss": 0.0004,
      "step": 3815
    },
    {
      "epoch": 1.1167690956979808,
      "grad_norm": 0.003221668303012848,
      "learning_rate": 2.2080772607550486e-05,
      "loss": 0.0001,
      "step": 3816
    },
    {
      "epoch": 1.1170617500731637,
      "grad_norm": 0.015286382287740707,
      "learning_rate": 2.2073456248170914e-05,
      "loss": 0.0002,
      "step": 3817
    },
    {
      "epoch": 1.1173544044483465,
      "grad_norm": 0.0132718151435256,
      "learning_rate": 2.2066139888791338e-05,
      "loss": 0.0002,
      "step": 3818
    },
    {
      "epoch": 1.1176470588235294,
      "grad_norm": 0.056354206055402756,
      "learning_rate": 2.2058823529411766e-05,
      "loss": 0.0004,
      "step": 3819
    },
    {
      "epoch": 1.1179397131987123,
      "grad_norm": 0.0911208763718605,
      "learning_rate": 2.2051507170032194e-05,
      "loss": 0.0011,
      "step": 3820
    },
    {
      "epoch": 1.1182323675738952,
      "grad_norm": 0.07263613492250443,
      "learning_rate": 2.2044190810652622e-05,
      "loss": 0.0006,
      "step": 3821
    },
    {
      "epoch": 1.118525021949078,
      "grad_norm": 2.2835283279418945,
      "learning_rate": 2.203687445127305e-05,
      "loss": 0.0163,
      "step": 3822
    },
    {
      "epoch": 1.118817676324261,
      "grad_norm": 0.05524302273988724,
      "learning_rate": 2.2029558091893475e-05,
      "loss": 0.0007,
      "step": 3823
    },
    {
      "epoch": 1.1191103306994439,
      "grad_norm": 0.0030861482955515385,
      "learning_rate": 2.2022241732513903e-05,
      "loss": 0.0,
      "step": 3824
    },
    {
      "epoch": 1.1194029850746268,
      "grad_norm": 0.0007699658162891865,
      "learning_rate": 2.201492537313433e-05,
      "loss": 0.0,
      "step": 3825
    },
    {
      "epoch": 1.1196956394498099,
      "grad_norm": 0.004165220074355602,
      "learning_rate": 2.200760901375476e-05,
      "loss": 0.0001,
      "step": 3826
    },
    {
      "epoch": 1.1199882938249928,
      "grad_norm": 0.0010617617517709732,
      "learning_rate": 2.2000292654375186e-05,
      "loss": 0.0,
      "step": 3827
    },
    {
      "epoch": 1.1202809482001757,
      "grad_norm": 0.000694679154548794,
      "learning_rate": 2.199297629499561e-05,
      "loss": 0.0,
      "step": 3828
    },
    {
      "epoch": 1.1205736025753585,
      "grad_norm": 0.0009077360155060887,
      "learning_rate": 2.198565993561604e-05,
      "loss": 0.0,
      "step": 3829
    },
    {
      "epoch": 1.1208662569505414,
      "grad_norm": 0.0010671500349417329,
      "learning_rate": 2.1978343576236467e-05,
      "loss": 0.0,
      "step": 3830
    },
    {
      "epoch": 1.1211589113257243,
      "grad_norm": 0.0006664483807981014,
      "learning_rate": 2.1971027216856895e-05,
      "loss": 0.0,
      "step": 3831
    },
    {
      "epoch": 1.1214515657009072,
      "grad_norm": 0.00031987507827579975,
      "learning_rate": 2.196371085747732e-05,
      "loss": 0.0,
      "step": 3832
    },
    {
      "epoch": 1.12174422007609,
      "grad_norm": 0.001221408136188984,
      "learning_rate": 2.1956394498097747e-05,
      "loss": 0.0,
      "step": 3833
    },
    {
      "epoch": 1.122036874451273,
      "grad_norm": 0.09421882778406143,
      "learning_rate": 2.1949078138718175e-05,
      "loss": 0.0004,
      "step": 3834
    },
    {
      "epoch": 1.1223295288264559,
      "grad_norm": 0.0010368202347308397,
      "learning_rate": 2.1941761779338603e-05,
      "loss": 0.0,
      "step": 3835
    },
    {
      "epoch": 1.122622183201639,
      "grad_norm": 0.0008518972317688167,
      "learning_rate": 2.1934445419959028e-05,
      "loss": 0.0,
      "step": 3836
    },
    {
      "epoch": 1.1229148375768219,
      "grad_norm": 0.0005097311805002391,
      "learning_rate": 2.1927129060579456e-05,
      "loss": 0.0,
      "step": 3837
    },
    {
      "epoch": 1.1232074919520048,
      "grad_norm": 0.0004974879557266831,
      "learning_rate": 2.1919812701199884e-05,
      "loss": 0.0,
      "step": 3838
    },
    {
      "epoch": 1.1235001463271876,
      "grad_norm": 0.0005729788681492209,
      "learning_rate": 2.191249634182031e-05,
      "loss": 0.0,
      "step": 3839
    },
    {
      "epoch": 1.1237928007023705,
      "grad_norm": 0.0005809550639241934,
      "learning_rate": 2.1905179982440736e-05,
      "loss": 0.0,
      "step": 3840
    },
    {
      "epoch": 1.1240854550775534,
      "grad_norm": 0.00042159974691458046,
      "learning_rate": 2.1897863623061164e-05,
      "loss": 0.0,
      "step": 3841
    },
    {
      "epoch": 1.1243781094527363,
      "grad_norm": 0.00027188719832338393,
      "learning_rate": 2.1890547263681592e-05,
      "loss": 0.0,
      "step": 3842
    },
    {
      "epoch": 1.1246707638279192,
      "grad_norm": 0.0004589363234117627,
      "learning_rate": 2.188323090430202e-05,
      "loss": 0.0,
      "step": 3843
    },
    {
      "epoch": 1.124963418203102,
      "grad_norm": 0.0006850520730949938,
      "learning_rate": 2.1875914544922448e-05,
      "loss": 0.0,
      "step": 3844
    },
    {
      "epoch": 1.125256072578285,
      "grad_norm": 0.0022457025479525328,
      "learning_rate": 2.1868598185542873e-05,
      "loss": 0.0,
      "step": 3845
    },
    {
      "epoch": 1.1255487269534679,
      "grad_norm": 0.0006316835060715675,
      "learning_rate": 2.18612818261633e-05,
      "loss": 0.0,
      "step": 3846
    },
    {
      "epoch": 1.1258413813286507,
      "grad_norm": 0.00030914306989870965,
      "learning_rate": 2.185396546678373e-05,
      "loss": 0.0,
      "step": 3847
    },
    {
      "epoch": 1.1261340357038339,
      "grad_norm": 0.00035717347054742277,
      "learning_rate": 2.1846649107404156e-05,
      "loss": 0.0,
      "step": 3848
    },
    {
      "epoch": 1.1264266900790167,
      "grad_norm": 0.0013712747022509575,
      "learning_rate": 2.1839332748024584e-05,
      "loss": 0.0,
      "step": 3849
    },
    {
      "epoch": 1.1267193444541996,
      "grad_norm": 0.0019398657605051994,
      "learning_rate": 2.183201638864501e-05,
      "loss": 0.0,
      "step": 3850
    },
    {
      "epoch": 1.1270119988293825,
      "grad_norm": 8.307303428649902,
      "learning_rate": 2.1824700029265437e-05,
      "loss": 0.0894,
      "step": 3851
    },
    {
      "epoch": 1.1273046532045654,
      "grad_norm": 3.9371161460876465,
      "learning_rate": 2.1817383669885865e-05,
      "loss": 0.0099,
      "step": 3852
    },
    {
      "epoch": 1.1275973075797483,
      "grad_norm": 0.000982138211838901,
      "learning_rate": 2.1810067310506293e-05,
      "loss": 0.0,
      "step": 3853
    },
    {
      "epoch": 1.1278899619549312,
      "grad_norm": 0.0011270848335698247,
      "learning_rate": 2.180275095112672e-05,
      "loss": 0.0,
      "step": 3854
    },
    {
      "epoch": 1.128182616330114,
      "grad_norm": 0.1403433382511139,
      "learning_rate": 2.1795434591747145e-05,
      "loss": 0.0003,
      "step": 3855
    },
    {
      "epoch": 1.128475270705297,
      "grad_norm": 0.014551272615790367,
      "learning_rate": 2.1788118232367573e-05,
      "loss": 0.0001,
      "step": 3856
    },
    {
      "epoch": 1.12876792508048,
      "grad_norm": 12.923527717590332,
      "learning_rate": 2.1780801872988e-05,
      "loss": 0.0838,
      "step": 3857
    },
    {
      "epoch": 1.129060579455663,
      "grad_norm": 0.000964752456638962,
      "learning_rate": 2.177348551360843e-05,
      "loss": 0.0,
      "step": 3858
    },
    {
      "epoch": 1.1293532338308458,
      "grad_norm": 0.0010205963626503944,
      "learning_rate": 2.1766169154228857e-05,
      "loss": 0.0,
      "step": 3859
    },
    {
      "epoch": 1.1296458882060287,
      "grad_norm": 0.0003431029326748103,
      "learning_rate": 2.1758852794849285e-05,
      "loss": 0.0,
      "step": 3860
    },
    {
      "epoch": 1.1299385425812116,
      "grad_norm": 0.00042271107668057084,
      "learning_rate": 2.175153643546971e-05,
      "loss": 0.0,
      "step": 3861
    },
    {
      "epoch": 1.1302311969563945,
      "grad_norm": 0.0009413016960024834,
      "learning_rate": 2.1744220076090138e-05,
      "loss": 0.0,
      "step": 3862
    },
    {
      "epoch": 1.1305238513315774,
      "grad_norm": 0.00035180142731405795,
      "learning_rate": 2.1736903716710565e-05,
      "loss": 0.0,
      "step": 3863
    },
    {
      "epoch": 1.1308165057067603,
      "grad_norm": 0.0008406572160311043,
      "learning_rate": 2.1729587357330993e-05,
      "loss": 0.0,
      "step": 3864
    },
    {
      "epoch": 1.1311091600819432,
      "grad_norm": 0.0013551930896937847,
      "learning_rate": 2.172227099795142e-05,
      "loss": 0.0,
      "step": 3865
    },
    {
      "epoch": 1.131401814457126,
      "grad_norm": 0.0004497410263866186,
      "learning_rate": 2.1714954638571846e-05,
      "loss": 0.0,
      "step": 3866
    },
    {
      "epoch": 1.131694468832309,
      "grad_norm": 0.00132949638646096,
      "learning_rate": 2.1707638279192274e-05,
      "loss": 0.0,
      "step": 3867
    },
    {
      "epoch": 1.1319871232074918,
      "grad_norm": 0.0037383574526757,
      "learning_rate": 2.1700321919812702e-05,
      "loss": 0.0001,
      "step": 3868
    },
    {
      "epoch": 1.132279777582675,
      "grad_norm": 0.001089840428903699,
      "learning_rate": 2.169300556043313e-05,
      "loss": 0.0,
      "step": 3869
    },
    {
      "epoch": 1.1325724319578578,
      "grad_norm": 0.0005344722303561866,
      "learning_rate": 2.1685689201053558e-05,
      "loss": 0.0,
      "step": 3870
    },
    {
      "epoch": 1.1328650863330407,
      "grad_norm": 0.001129949581809342,
      "learning_rate": 2.1678372841673982e-05,
      "loss": 0.0,
      "step": 3871
    },
    {
      "epoch": 1.1331577407082236,
      "grad_norm": 0.001591612584888935,
      "learning_rate": 2.167105648229441e-05,
      "loss": 0.0,
      "step": 3872
    },
    {
      "epoch": 1.1334503950834065,
      "grad_norm": 0.0008599033462814987,
      "learning_rate": 2.1663740122914838e-05,
      "loss": 0.0,
      "step": 3873
    },
    {
      "epoch": 1.1337430494585894,
      "grad_norm": 0.0004773321852553636,
      "learning_rate": 2.1656423763535266e-05,
      "loss": 0.0,
      "step": 3874
    },
    {
      "epoch": 1.1340357038337723,
      "grad_norm": 0.0018014417728409171,
      "learning_rate": 2.1649107404155694e-05,
      "loss": 0.0,
      "step": 3875
    },
    {
      "epoch": 1.1343283582089552,
      "grad_norm": 0.0020266417413949966,
      "learning_rate": 2.164179104477612e-05,
      "loss": 0.0,
      "step": 3876
    },
    {
      "epoch": 1.134621012584138,
      "grad_norm": 0.21160373091697693,
      "learning_rate": 2.1634474685396547e-05,
      "loss": 0.0011,
      "step": 3877
    },
    {
      "epoch": 1.1349136669593212,
      "grad_norm": 0.01681935228407383,
      "learning_rate": 2.1627158326016975e-05,
      "loss": 0.0001,
      "step": 3878
    },
    {
      "epoch": 1.135206321334504,
      "grad_norm": 0.0005730148404836655,
      "learning_rate": 2.1619841966637403e-05,
      "loss": 0.0,
      "step": 3879
    },
    {
      "epoch": 1.135498975709687,
      "grad_norm": 0.0013979710638523102,
      "learning_rate": 2.161252560725783e-05,
      "loss": 0.0,
      "step": 3880
    },
    {
      "epoch": 1.1357916300848698,
      "grad_norm": 0.0007042467477731407,
      "learning_rate": 2.160520924787826e-05,
      "loss": 0.0,
      "step": 3881
    },
    {
      "epoch": 1.1360842844600527,
      "grad_norm": 0.0012201687786728144,
      "learning_rate": 2.1597892888498683e-05,
      "loss": 0.0,
      "step": 3882
    },
    {
      "epoch": 1.1363769388352356,
      "grad_norm": 0.000890715979039669,
      "learning_rate": 2.159057652911911e-05,
      "loss": 0.0,
      "step": 3883
    },
    {
      "epoch": 1.1366695932104185,
      "grad_norm": 0.0024068302009254694,
      "learning_rate": 2.158326016973954e-05,
      "loss": 0.0,
      "step": 3884
    },
    {
      "epoch": 1.1369622475856014,
      "grad_norm": 0.0013551790034398437,
      "learning_rate": 2.1575943810359967e-05,
      "loss": 0.0,
      "step": 3885
    },
    {
      "epoch": 1.1372549019607843,
      "grad_norm": 0.055313825607299805,
      "learning_rate": 2.1568627450980395e-05,
      "loss": 0.0002,
      "step": 3886
    },
    {
      "epoch": 1.1375475563359672,
      "grad_norm": 0.0012204163940623403,
      "learning_rate": 2.156131109160082e-05,
      "loss": 0.0,
      "step": 3887
    },
    {
      "epoch": 1.13784021071115,
      "grad_norm": 0.0007513660821132362,
      "learning_rate": 2.1553994732221247e-05,
      "loss": 0.0,
      "step": 3888
    },
    {
      "epoch": 1.138132865086333,
      "grad_norm": 0.00091296446043998,
      "learning_rate": 2.1546678372841675e-05,
      "loss": 0.0,
      "step": 3889
    },
    {
      "epoch": 1.138425519461516,
      "grad_norm": 0.0008509410545229912,
      "learning_rate": 2.1539362013462103e-05,
      "loss": 0.0,
      "step": 3890
    },
    {
      "epoch": 1.138718173836699,
      "grad_norm": 0.0007758307037875056,
      "learning_rate": 2.153204565408253e-05,
      "loss": 0.0,
      "step": 3891
    },
    {
      "epoch": 1.1390108282118818,
      "grad_norm": 0.0006105030188336968,
      "learning_rate": 2.1524729294702956e-05,
      "loss": 0.0,
      "step": 3892
    },
    {
      "epoch": 1.1393034825870647,
      "grad_norm": 0.00399687048047781,
      "learning_rate": 2.1517412935323384e-05,
      "loss": 0.0,
      "step": 3893
    },
    {
      "epoch": 1.1395961369622476,
      "grad_norm": 0.6638000011444092,
      "learning_rate": 2.151009657594381e-05,
      "loss": 0.0009,
      "step": 3894
    },
    {
      "epoch": 1.1398887913374305,
      "grad_norm": 0.0012894651154056191,
      "learning_rate": 2.150278021656424e-05,
      "loss": 0.0,
      "step": 3895
    },
    {
      "epoch": 1.1401814457126134,
      "grad_norm": 0.0012685739202424884,
      "learning_rate": 2.1495463857184668e-05,
      "loss": 0.0,
      "step": 3896
    },
    {
      "epoch": 1.1404741000877963,
      "grad_norm": 0.00038186321035027504,
      "learning_rate": 2.1488147497805095e-05,
      "loss": 0.0,
      "step": 3897
    },
    {
      "epoch": 1.1407667544629791,
      "grad_norm": 0.005274210590869188,
      "learning_rate": 2.148083113842552e-05,
      "loss": 0.0001,
      "step": 3898
    },
    {
      "epoch": 1.141059408838162,
      "grad_norm": 0.0008092095959000289,
      "learning_rate": 2.1473514779045948e-05,
      "loss": 0.0,
      "step": 3899
    },
    {
      "epoch": 1.1413520632133451,
      "grad_norm": 0.002121126279234886,
      "learning_rate": 2.1466198419666376e-05,
      "loss": 0.0,
      "step": 3900
    },
    {
      "epoch": 1.141644717588528,
      "grad_norm": 0.00620256457477808,
      "learning_rate": 2.1458882060286804e-05,
      "loss": 0.0001,
      "step": 3901
    },
    {
      "epoch": 1.141937371963711,
      "grad_norm": 0.0012706911657005548,
      "learning_rate": 2.1451565700907232e-05,
      "loss": 0.0,
      "step": 3902
    },
    {
      "epoch": 1.1422300263388938,
      "grad_norm": 0.0016427603550255299,
      "learning_rate": 2.1444249341527656e-05,
      "loss": 0.0,
      "step": 3903
    },
    {
      "epoch": 1.1425226807140767,
      "grad_norm": 0.004213469102978706,
      "learning_rate": 2.1436932982148084e-05,
      "loss": 0.0001,
      "step": 3904
    },
    {
      "epoch": 1.1428153350892596,
      "grad_norm": 0.003851081943139434,
      "learning_rate": 2.1429616622768512e-05,
      "loss": 0.0001,
      "step": 3905
    },
    {
      "epoch": 1.1431079894644425,
      "grad_norm": 0.0013577681966125965,
      "learning_rate": 2.142230026338894e-05,
      "loss": 0.0,
      "step": 3906
    },
    {
      "epoch": 1.1434006438396254,
      "grad_norm": 0.0006001973524689674,
      "learning_rate": 2.1414983904009368e-05,
      "loss": 0.0,
      "step": 3907
    },
    {
      "epoch": 1.1436932982148083,
      "grad_norm": 0.002575696911662817,
      "learning_rate": 2.1407667544629793e-05,
      "loss": 0.0,
      "step": 3908
    },
    {
      "epoch": 1.1439859525899911,
      "grad_norm": 0.0007619388052262366,
      "learning_rate": 2.140035118525022e-05,
      "loss": 0.0,
      "step": 3909
    },
    {
      "epoch": 1.144278606965174,
      "grad_norm": 0.0007594061316922307,
      "learning_rate": 2.139303482587065e-05,
      "loss": 0.0,
      "step": 3910
    },
    {
      "epoch": 1.144571261340357,
      "grad_norm": 0.035657331347465515,
      "learning_rate": 2.1385718466491077e-05,
      "loss": 0.0002,
      "step": 3911
    },
    {
      "epoch": 1.14486391571554,
      "grad_norm": 0.0011063236743211746,
      "learning_rate": 2.1378402107111505e-05,
      "loss": 0.0,
      "step": 3912
    },
    {
      "epoch": 1.145156570090723,
      "grad_norm": 0.0025668165180832148,
      "learning_rate": 2.1371085747731933e-05,
      "loss": 0.0,
      "step": 3913
    },
    {
      "epoch": 1.1454492244659058,
      "grad_norm": 0.0062032961286604404,
      "learning_rate": 2.1363769388352357e-05,
      "loss": 0.0001,
      "step": 3914
    },
    {
      "epoch": 1.1457418788410887,
      "grad_norm": 0.01251330878585577,
      "learning_rate": 2.1356453028972785e-05,
      "loss": 0.0001,
      "step": 3915
    },
    {
      "epoch": 1.1460345332162716,
      "grad_norm": 0.0013093686429783702,
      "learning_rate": 2.1349136669593213e-05,
      "loss": 0.0,
      "step": 3916
    },
    {
      "epoch": 1.1463271875914545,
      "grad_norm": 0.0021693778689950705,
      "learning_rate": 2.134182031021364e-05,
      "loss": 0.0,
      "step": 3917
    },
    {
      "epoch": 1.1466198419666374,
      "grad_norm": 0.056038640439510345,
      "learning_rate": 2.1334503950834065e-05,
      "loss": 0.0003,
      "step": 3918
    },
    {
      "epoch": 1.1469124963418202,
      "grad_norm": 0.0013667010935023427,
      "learning_rate": 2.1327187591454493e-05,
      "loss": 0.0,
      "step": 3919
    },
    {
      "epoch": 1.1472051507170031,
      "grad_norm": 0.0008248764206655324,
      "learning_rate": 2.131987123207492e-05,
      "loss": 0.0,
      "step": 3920
    },
    {
      "epoch": 1.1474978050921862,
      "grad_norm": 0.0006004977622069418,
      "learning_rate": 2.131255487269535e-05,
      "loss": 0.0,
      "step": 3921
    },
    {
      "epoch": 1.1477904594673691,
      "grad_norm": 0.0019180545350536704,
      "learning_rate": 2.1305238513315774e-05,
      "loss": 0.0,
      "step": 3922
    },
    {
      "epoch": 1.148083113842552,
      "grad_norm": 0.0006675656768493354,
      "learning_rate": 2.1297922153936202e-05,
      "loss": 0.0,
      "step": 3923
    },
    {
      "epoch": 1.148375768217735,
      "grad_norm": 0.002919198013842106,
      "learning_rate": 2.129060579455663e-05,
      "loss": 0.0,
      "step": 3924
    },
    {
      "epoch": 1.1486684225929178,
      "grad_norm": 0.0006825899472460151,
      "learning_rate": 2.1283289435177058e-05,
      "loss": 0.0,
      "step": 3925
    },
    {
      "epoch": 1.1489610769681007,
      "grad_norm": 0.0016971147852018476,
      "learning_rate": 2.1275973075797486e-05,
      "loss": 0.0,
      "step": 3926
    },
    {
      "epoch": 1.1492537313432836,
      "grad_norm": 0.0005503272986970842,
      "learning_rate": 2.126865671641791e-05,
      "loss": 0.0,
      "step": 3927
    },
    {
      "epoch": 1.1495463857184665,
      "grad_norm": 0.00199718214571476,
      "learning_rate": 2.1261340357038338e-05,
      "loss": 0.0,
      "step": 3928
    },
    {
      "epoch": 1.1498390400936493,
      "grad_norm": 0.0035684986505657434,
      "learning_rate": 2.1254023997658766e-05,
      "loss": 0.0001,
      "step": 3929
    },
    {
      "epoch": 1.1501316944688322,
      "grad_norm": 0.0018766399007290602,
      "learning_rate": 2.1246707638279194e-05,
      "loss": 0.0,
      "step": 3930
    },
    {
      "epoch": 1.1504243488440151,
      "grad_norm": 0.001120618311688304,
      "learning_rate": 2.123939127889962e-05,
      "loss": 0.0,
      "step": 3931
    },
    {
      "epoch": 1.150717003219198,
      "grad_norm": 0.0007104809628799558,
      "learning_rate": 2.1232074919520047e-05,
      "loss": 0.0,
      "step": 3932
    },
    {
      "epoch": 1.1510096575943811,
      "grad_norm": 0.0005078423419035971,
      "learning_rate": 2.1224758560140475e-05,
      "loss": 0.0,
      "step": 3933
    },
    {
      "epoch": 1.151302311969564,
      "grad_norm": 0.0006699707009829581,
      "learning_rate": 2.1217442200760903e-05,
      "loss": 0.0,
      "step": 3934
    },
    {
      "epoch": 1.151594966344747,
      "grad_norm": 0.0008183489553630352,
      "learning_rate": 2.1210125841381327e-05,
      "loss": 0.0,
      "step": 3935
    },
    {
      "epoch": 1.1518876207199298,
      "grad_norm": 0.0010990456212311983,
      "learning_rate": 2.1202809482001755e-05,
      "loss": 0.0,
      "step": 3936
    },
    {
      "epoch": 1.1521802750951127,
      "grad_norm": 0.0018640455091372132,
      "learning_rate": 2.1195493122622183e-05,
      "loss": 0.0,
      "step": 3937
    },
    {
      "epoch": 1.1524729294702956,
      "grad_norm": 0.002578464802354574,
      "learning_rate": 2.118817676324261e-05,
      "loss": 0.0,
      "step": 3938
    },
    {
      "epoch": 1.1527655838454784,
      "grad_norm": 0.0022473332937806845,
      "learning_rate": 2.118086040386304e-05,
      "loss": 0.0,
      "step": 3939
    },
    {
      "epoch": 1.1530582382206613,
      "grad_norm": 0.014399176463484764,
      "learning_rate": 2.1173544044483463e-05,
      "loss": 0.0002,
      "step": 3940
    },
    {
      "epoch": 1.1533508925958442,
      "grad_norm": 0.0022750303614884615,
      "learning_rate": 2.116622768510389e-05,
      "loss": 0.0,
      "step": 3941
    },
    {
      "epoch": 1.1536435469710273,
      "grad_norm": 0.0017155127134174109,
      "learning_rate": 2.115891132572432e-05,
      "loss": 0.0,
      "step": 3942
    },
    {
      "epoch": 1.1539362013462102,
      "grad_norm": 0.0037041513714939356,
      "learning_rate": 2.1151594966344747e-05,
      "loss": 0.0,
      "step": 3943
    },
    {
      "epoch": 1.154228855721393,
      "grad_norm": 0.0008389271097257733,
      "learning_rate": 2.1144278606965175e-05,
      "loss": 0.0,
      "step": 3944
    },
    {
      "epoch": 1.154521510096576,
      "grad_norm": 0.0014524850994348526,
      "learning_rate": 2.11369622475856e-05,
      "loss": 0.0,
      "step": 3945
    },
    {
      "epoch": 1.1548141644717589,
      "grad_norm": 0.059136826545000076,
      "learning_rate": 2.1129645888206028e-05,
      "loss": 0.0003,
      "step": 3946
    },
    {
      "epoch": 1.1551068188469418,
      "grad_norm": 0.0016152020543813705,
      "learning_rate": 2.1122329528826456e-05,
      "loss": 0.0,
      "step": 3947
    },
    {
      "epoch": 1.1553994732221247,
      "grad_norm": 0.0002280518237967044,
      "learning_rate": 2.1115013169446884e-05,
      "loss": 0.0,
      "step": 3948
    },
    {
      "epoch": 1.1556921275973076,
      "grad_norm": 0.0015034673269838095,
      "learning_rate": 2.110769681006731e-05,
      "loss": 0.0,
      "step": 3949
    },
    {
      "epoch": 1.1559847819724904,
      "grad_norm": 0.003323787124827504,
      "learning_rate": 2.110038045068774e-05,
      "loss": 0.0001,
      "step": 3950
    },
    {
      "epoch": 1.1562774363476733,
      "grad_norm": 0.0010264019947499037,
      "learning_rate": 2.1093064091308164e-05,
      "loss": 0.0,
      "step": 3951
    },
    {
      "epoch": 1.1565700907228562,
      "grad_norm": 0.0008539935224689543,
      "learning_rate": 2.1085747731928592e-05,
      "loss": 0.0,
      "step": 3952
    },
    {
      "epoch": 1.156862745098039,
      "grad_norm": 0.0005931356572546065,
      "learning_rate": 2.107843137254902e-05,
      "loss": 0.0,
      "step": 3953
    },
    {
      "epoch": 1.1571553994732222,
      "grad_norm": 0.001304261269979179,
      "learning_rate": 2.1071115013169448e-05,
      "loss": 0.0,
      "step": 3954
    },
    {
      "epoch": 1.157448053848405,
      "grad_norm": 0.0005458810483105481,
      "learning_rate": 2.1063798653789876e-05,
      "loss": 0.0,
      "step": 3955
    },
    {
      "epoch": 1.157740708223588,
      "grad_norm": 0.0012579706963151693,
      "learning_rate": 2.10564822944103e-05,
      "loss": 0.0,
      "step": 3956
    },
    {
      "epoch": 1.1580333625987709,
      "grad_norm": 0.0019264259608462453,
      "learning_rate": 2.104916593503073e-05,
      "loss": 0.0,
      "step": 3957
    },
    {
      "epoch": 1.1583260169739538,
      "grad_norm": 0.0005996621330268681,
      "learning_rate": 2.1041849575651156e-05,
      "loss": 0.0,
      "step": 3958
    },
    {
      "epoch": 1.1586186713491367,
      "grad_norm": 0.000745131925214082,
      "learning_rate": 2.1034533216271584e-05,
      "loss": 0.0,
      "step": 3959
    },
    {
      "epoch": 1.1589113257243195,
      "grad_norm": 0.05781120806932449,
      "learning_rate": 2.1027216856892012e-05,
      "loss": 0.0003,
      "step": 3960
    },
    {
      "epoch": 1.1592039800995024,
      "grad_norm": 0.002012160373851657,
      "learning_rate": 2.1019900497512437e-05,
      "loss": 0.0,
      "step": 3961
    },
    {
      "epoch": 1.1594966344746853,
      "grad_norm": 0.007525267545133829,
      "learning_rate": 2.1012584138132865e-05,
      "loss": 0.0001,
      "step": 3962
    },
    {
      "epoch": 1.1597892888498684,
      "grad_norm": 0.0028305898886173964,
      "learning_rate": 2.1005267778753293e-05,
      "loss": 0.0,
      "step": 3963
    },
    {
      "epoch": 1.1600819432250513,
      "grad_norm": 0.004379513207823038,
      "learning_rate": 2.099795141937372e-05,
      "loss": 0.0,
      "step": 3964
    },
    {
      "epoch": 1.1603745976002342,
      "grad_norm": 0.0022841941099613905,
      "learning_rate": 2.099063505999415e-05,
      "loss": 0.0,
      "step": 3965
    },
    {
      "epoch": 1.160667251975417,
      "grad_norm": 0.00092789635527879,
      "learning_rate": 2.0983318700614577e-05,
      "loss": 0.0,
      "step": 3966
    },
    {
      "epoch": 1.1609599063506,
      "grad_norm": 0.0009375543450005352,
      "learning_rate": 2.0976002341235e-05,
      "loss": 0.0,
      "step": 3967
    },
    {
      "epoch": 1.1612525607257829,
      "grad_norm": 0.013091144151985645,
      "learning_rate": 2.096868598185543e-05,
      "loss": 0.0001,
      "step": 3968
    },
    {
      "epoch": 1.1615452151009658,
      "grad_norm": 0.00041092262836173177,
      "learning_rate": 2.0961369622475857e-05,
      "loss": 0.0,
      "step": 3969
    },
    {
      "epoch": 1.1618378694761486,
      "grad_norm": 0.00044362861081026495,
      "learning_rate": 2.0954053263096285e-05,
      "loss": 0.0,
      "step": 3970
    },
    {
      "epoch": 1.1621305238513315,
      "grad_norm": 0.0012614787556231022,
      "learning_rate": 2.0946736903716713e-05,
      "loss": 0.0,
      "step": 3971
    },
    {
      "epoch": 1.1624231782265144,
      "grad_norm": 0.0003866611805278808,
      "learning_rate": 2.0939420544337138e-05,
      "loss": 0.0,
      "step": 3972
    },
    {
      "epoch": 1.1627158326016973,
      "grad_norm": 0.0011134531814604998,
      "learning_rate": 2.0932104184957565e-05,
      "loss": 0.0,
      "step": 3973
    },
    {
      "epoch": 1.1630084869768802,
      "grad_norm": 0.0004822705523110926,
      "learning_rate": 2.0924787825577993e-05,
      "loss": 0.0,
      "step": 3974
    },
    {
      "epoch": 1.1633011413520633,
      "grad_norm": 0.00044631559285335243,
      "learning_rate": 2.091747146619842e-05,
      "loss": 0.0,
      "step": 3975
    },
    {
      "epoch": 1.1635937957272462,
      "grad_norm": 0.0002210295933764428,
      "learning_rate": 2.091015510681885e-05,
      "loss": 0.0,
      "step": 3976
    },
    {
      "epoch": 1.163886450102429,
      "grad_norm": 0.0007133566541597247,
      "learning_rate": 2.0902838747439274e-05,
      "loss": 0.0,
      "step": 3977
    },
    {
      "epoch": 1.164179104477612,
      "grad_norm": 0.003904839511960745,
      "learning_rate": 2.0895522388059702e-05,
      "loss": 0.0,
      "step": 3978
    },
    {
      "epoch": 1.1644717588527949,
      "grad_norm": 0.00019842377514578402,
      "learning_rate": 2.088820602868013e-05,
      "loss": 0.0,
      "step": 3979
    },
    {
      "epoch": 1.1647644132279777,
      "grad_norm": 0.0004171247419435531,
      "learning_rate": 2.0880889669300558e-05,
      "loss": 0.0,
      "step": 3980
    },
    {
      "epoch": 1.1650570676031606,
      "grad_norm": 0.0003570810076780617,
      "learning_rate": 2.0873573309920986e-05,
      "loss": 0.0,
      "step": 3981
    },
    {
      "epoch": 1.1653497219783435,
      "grad_norm": 0.00041155374492518604,
      "learning_rate": 2.0866256950541414e-05,
      "loss": 0.0,
      "step": 3982
    },
    {
      "epoch": 1.1656423763535264,
      "grad_norm": 0.00036599207669496536,
      "learning_rate": 2.0858940591161838e-05,
      "loss": 0.0,
      "step": 3983
    },
    {
      "epoch": 1.1659350307287093,
      "grad_norm": 0.0005415352643467486,
      "learning_rate": 2.0851624231782266e-05,
      "loss": 0.0,
      "step": 3984
    },
    {
      "epoch": 1.1662276851038924,
      "grad_norm": 0.00030228469404391944,
      "learning_rate": 2.0844307872402694e-05,
      "loss": 0.0,
      "step": 3985
    },
    {
      "epoch": 1.1665203394790753,
      "grad_norm": 0.0003409007331356406,
      "learning_rate": 2.0836991513023122e-05,
      "loss": 0.0,
      "step": 3986
    },
    {
      "epoch": 1.1668129938542582,
      "grad_norm": 0.000449575629318133,
      "learning_rate": 2.082967515364355e-05,
      "loss": 0.0,
      "step": 3987
    },
    {
      "epoch": 1.167105648229441,
      "grad_norm": 0.00036963477032259107,
      "learning_rate": 2.0822358794263975e-05,
      "loss": 0.0,
      "step": 3988
    },
    {
      "epoch": 1.167398302604624,
      "grad_norm": 0.0006797259557060897,
      "learning_rate": 2.0815042434884403e-05,
      "loss": 0.0,
      "step": 3989
    },
    {
      "epoch": 1.1676909569798068,
      "grad_norm": 0.0004133189213462174,
      "learning_rate": 2.080772607550483e-05,
      "loss": 0.0,
      "step": 3990
    },
    {
      "epoch": 1.1679836113549897,
      "grad_norm": 0.0008724583312869072,
      "learning_rate": 2.080040971612526e-05,
      "loss": 0.0,
      "step": 3991
    },
    {
      "epoch": 1.1682762657301726,
      "grad_norm": 0.00334708159789443,
      "learning_rate": 2.0793093356745686e-05,
      "loss": 0.0,
      "step": 3992
    },
    {
      "epoch": 1.1685689201053555,
      "grad_norm": 0.00032076481147669256,
      "learning_rate": 2.078577699736611e-05,
      "loss": 0.0,
      "step": 3993
    },
    {
      "epoch": 1.1688615744805384,
      "grad_norm": 0.00036713696317747235,
      "learning_rate": 2.077846063798654e-05,
      "loss": 0.0,
      "step": 3994
    },
    {
      "epoch": 1.1691542288557213,
      "grad_norm": 0.0003647505945991725,
      "learning_rate": 2.0771144278606967e-05,
      "loss": 0.0,
      "step": 3995
    },
    {
      "epoch": 1.1694468832309042,
      "grad_norm": 0.00010544279939495027,
      "learning_rate": 2.0763827919227395e-05,
      "loss": 0.0,
      "step": 3996
    },
    {
      "epoch": 1.1697395376060873,
      "grad_norm": 0.0005103484727442265,
      "learning_rate": 2.0756511559847823e-05,
      "loss": 0.0,
      "step": 3997
    },
    {
      "epoch": 1.1700321919812702,
      "grad_norm": 0.0012317753862589598,
      "learning_rate": 2.0749195200468247e-05,
      "loss": 0.0,
      "step": 3998
    },
    {
      "epoch": 1.170324846356453,
      "grad_norm": 0.006900831591337919,
      "learning_rate": 2.0741878841088675e-05,
      "loss": 0.0,
      "step": 3999
    },
    {
      "epoch": 1.170617500731636,
      "grad_norm": 0.0020533285569399595,
      "learning_rate": 2.0734562481709103e-05,
      "loss": 0.0,
      "step": 4000
    },
    {
      "epoch": 1.1709101551068188,
      "grad_norm": 0.0004798827867489308,
      "learning_rate": 2.072724612232953e-05,
      "loss": 0.0,
      "step": 4001
    },
    {
      "epoch": 1.1712028094820017,
      "grad_norm": 0.00036796287167817354,
      "learning_rate": 2.071992976294996e-05,
      "loss": 0.0,
      "step": 4002
    },
    {
      "epoch": 1.1714954638571846,
      "grad_norm": 0.020118188112974167,
      "learning_rate": 2.0712613403570387e-05,
      "loss": 0.0001,
      "step": 4003
    },
    {
      "epoch": 1.1717881182323675,
      "grad_norm": 0.0013013577554374933,
      "learning_rate": 2.070529704419081e-05,
      "loss": 0.0,
      "step": 4004
    },
    {
      "epoch": 1.1720807726075504,
      "grad_norm": 0.00038200916606001556,
      "learning_rate": 2.069798068481124e-05,
      "loss": 0.0,
      "step": 4005
    },
    {
      "epoch": 1.1723734269827335,
      "grad_norm": 0.0011500732507556677,
      "learning_rate": 2.0690664325431668e-05,
      "loss": 0.0,
      "step": 4006
    },
    {
      "epoch": 1.1726660813579164,
      "grad_norm": 0.00043047755025327206,
      "learning_rate": 2.0683347966052095e-05,
      "loss": 0.0,
      "step": 4007
    },
    {
      "epoch": 1.1729587357330993,
      "grad_norm": 0.0002854850608855486,
      "learning_rate": 2.0676031606672523e-05,
      "loss": 0.0,
      "step": 4008
    },
    {
      "epoch": 1.1732513901082822,
      "grad_norm": 0.0002904585562646389,
      "learning_rate": 2.0668715247292948e-05,
      "loss": 0.0,
      "step": 4009
    },
    {
      "epoch": 1.173544044483465,
      "grad_norm": 0.0018572751432657242,
      "learning_rate": 2.0661398887913376e-05,
      "loss": 0.0,
      "step": 4010
    },
    {
      "epoch": 1.173836698858648,
      "grad_norm": 0.00020035271882079542,
      "learning_rate": 2.0654082528533804e-05,
      "loss": 0.0,
      "step": 4011
    },
    {
      "epoch": 1.1741293532338308,
      "grad_norm": 0.0010458094766363502,
      "learning_rate": 2.0646766169154232e-05,
      "loss": 0.0,
      "step": 4012
    },
    {
      "epoch": 1.1744220076090137,
      "grad_norm": 0.000360166362952441,
      "learning_rate": 2.0639449809774656e-05,
      "loss": 0.0,
      "step": 4013
    },
    {
      "epoch": 1.1747146619841966,
      "grad_norm": 0.0004725328180938959,
      "learning_rate": 2.0632133450395084e-05,
      "loss": 0.0,
      "step": 4014
    },
    {
      "epoch": 1.1750073163593795,
      "grad_norm": 0.00040227134013548493,
      "learning_rate": 2.0624817091015512e-05,
      "loss": 0.0,
      "step": 4015
    },
    {
      "epoch": 1.1752999707345624,
      "grad_norm": 0.00047024450032040477,
      "learning_rate": 2.061750073163594e-05,
      "loss": 0.0,
      "step": 4016
    },
    {
      "epoch": 1.1755926251097453,
      "grad_norm": 0.0002049401227850467,
      "learning_rate": 2.0610184372256365e-05,
      "loss": 0.0,
      "step": 4017
    },
    {
      "epoch": 1.1758852794849284,
      "grad_norm": 0.00016430718824267387,
      "learning_rate": 2.0602868012876793e-05,
      "loss": 0.0,
      "step": 4018
    },
    {
      "epoch": 1.1761779338601113,
      "grad_norm": 0.00045258854515850544,
      "learning_rate": 2.059555165349722e-05,
      "loss": 0.0,
      "step": 4019
    },
    {
      "epoch": 1.1764705882352942,
      "grad_norm": 0.0007949948194436729,
      "learning_rate": 2.058823529411765e-05,
      "loss": 0.0,
      "step": 4020
    },
    {
      "epoch": 1.176763242610477,
      "grad_norm": 0.0004329327493906021,
      "learning_rate": 2.0580918934738073e-05,
      "loss": 0.0,
      "step": 4021
    },
    {
      "epoch": 1.17705589698566,
      "grad_norm": 0.000122263198136352,
      "learning_rate": 2.05736025753585e-05,
      "loss": 0.0,
      "step": 4022
    },
    {
      "epoch": 1.1773485513608428,
      "grad_norm": 0.00021170203399378806,
      "learning_rate": 2.056628621597893e-05,
      "loss": 0.0,
      "step": 4023
    },
    {
      "epoch": 1.1776412057360257,
      "grad_norm": 0.0002878759987652302,
      "learning_rate": 2.0558969856599357e-05,
      "loss": 0.0,
      "step": 4024
    },
    {
      "epoch": 1.1779338601112086,
      "grad_norm": 0.0004215857479721308,
      "learning_rate": 2.055165349721978e-05,
      "loss": 0.0,
      "step": 4025
    },
    {
      "epoch": 1.1782265144863915,
      "grad_norm": 0.00035516414209268987,
      "learning_rate": 2.054433713784021e-05,
      "loss": 0.0,
      "step": 4026
    },
    {
      "epoch": 1.1785191688615746,
      "grad_norm": 0.00024092184321489185,
      "learning_rate": 2.0537020778460638e-05,
      "loss": 0.0,
      "step": 4027
    },
    {
      "epoch": 1.1788118232367575,
      "grad_norm": 0.0005252889823168516,
      "learning_rate": 2.0529704419081065e-05,
      "loss": 0.0,
      "step": 4028
    },
    {
      "epoch": 1.1791044776119404,
      "grad_norm": 0.000901224440895021,
      "learning_rate": 2.0522388059701493e-05,
      "loss": 0.0,
      "step": 4029
    },
    {
      "epoch": 1.1793971319871233,
      "grad_norm": 0.0007788223447278142,
      "learning_rate": 2.0515071700321918e-05,
      "loss": 0.0,
      "step": 4030
    },
    {
      "epoch": 1.1796897863623061,
      "grad_norm": 0.005932623986154795,
      "learning_rate": 2.0507755340942346e-05,
      "loss": 0.0,
      "step": 4031
    },
    {
      "epoch": 1.179982440737489,
      "grad_norm": 0.12450405210256577,
      "learning_rate": 2.0500438981562774e-05,
      "loss": 0.0008,
      "step": 4032
    },
    {
      "epoch": 1.180275095112672,
      "grad_norm": 0.0016493074363097548,
      "learning_rate": 2.0493122622183202e-05,
      "loss": 0.0,
      "step": 4033
    },
    {
      "epoch": 1.1805677494878548,
      "grad_norm": 0.00031817733542993665,
      "learning_rate": 2.048580626280363e-05,
      "loss": 0.0,
      "step": 4034
    },
    {
      "epoch": 1.1808604038630377,
      "grad_norm": 0.000735086970962584,
      "learning_rate": 2.0478489903424058e-05,
      "loss": 0.0,
      "step": 4035
    },
    {
      "epoch": 1.1811530582382206,
      "grad_norm": 0.00015424641605932266,
      "learning_rate": 2.0471173544044482e-05,
      "loss": 0.0,
      "step": 4036
    },
    {
      "epoch": 1.1814457126134035,
      "grad_norm": 0.00034844077890738845,
      "learning_rate": 2.046385718466491e-05,
      "loss": 0.0,
      "step": 4037
    },
    {
      "epoch": 1.1817383669885864,
      "grad_norm": 0.0006114047137089074,
      "learning_rate": 2.0456540825285338e-05,
      "loss": 0.0,
      "step": 4038
    },
    {
      "epoch": 1.1820310213637695,
      "grad_norm": 0.0004173670313321054,
      "learning_rate": 2.0449224465905766e-05,
      "loss": 0.0,
      "step": 4039
    },
    {
      "epoch": 1.1823236757389524,
      "grad_norm": 0.00021460339485201985,
      "learning_rate": 2.0441908106526194e-05,
      "loss": 0.0,
      "step": 4040
    },
    {
      "epoch": 1.1826163301141353,
      "grad_norm": 0.00044810911640524864,
      "learning_rate": 2.043459174714662e-05,
      "loss": 0.0,
      "step": 4041
    },
    {
      "epoch": 1.1829089844893181,
      "grad_norm": 0.000367204484064132,
      "learning_rate": 2.0427275387767047e-05,
      "loss": 0.0,
      "step": 4042
    },
    {
      "epoch": 1.183201638864501,
      "grad_norm": 0.0007425369694828987,
      "learning_rate": 2.0419959028387475e-05,
      "loss": 0.0,
      "step": 4043
    },
    {
      "epoch": 1.183494293239684,
      "grad_norm": 0.00026489770971238613,
      "learning_rate": 2.0412642669007903e-05,
      "loss": 0.0,
      "step": 4044
    },
    {
      "epoch": 1.1837869476148668,
      "grad_norm": 0.00024482072331011295,
      "learning_rate": 2.040532630962833e-05,
      "loss": 0.0,
      "step": 4045
    },
    {
      "epoch": 1.1840796019900497,
      "grad_norm": 0.00015763095871079713,
      "learning_rate": 2.0398009950248755e-05,
      "loss": 0.0,
      "step": 4046
    },
    {
      "epoch": 1.1843722563652326,
      "grad_norm": 0.0009888801723718643,
      "learning_rate": 2.0390693590869183e-05,
      "loss": 0.0,
      "step": 4047
    },
    {
      "epoch": 1.1846649107404157,
      "grad_norm": 0.0003547268861439079,
      "learning_rate": 2.038337723148961e-05,
      "loss": 0.0,
      "step": 4048
    },
    {
      "epoch": 1.1849575651155986,
      "grad_norm": 0.0004106337728444487,
      "learning_rate": 2.037606087211004e-05,
      "loss": 0.0,
      "step": 4049
    },
    {
      "epoch": 1.1852502194907815,
      "grad_norm": 0.0001579160598339513,
      "learning_rate": 2.0368744512730467e-05,
      "loss": 0.0,
      "step": 4050
    },
    {
      "epoch": 1.1855428738659644,
      "grad_norm": 0.00015295147022698075,
      "learning_rate": 2.0361428153350895e-05,
      "loss": 0.0,
      "step": 4051
    },
    {
      "epoch": 1.1858355282411472,
      "grad_norm": 0.0012576906010508537,
      "learning_rate": 2.035411179397132e-05,
      "loss": 0.0,
      "step": 4052
    },
    {
      "epoch": 1.1861281826163301,
      "grad_norm": 0.0005667202058248222,
      "learning_rate": 2.0346795434591747e-05,
      "loss": 0.0,
      "step": 4053
    },
    {
      "epoch": 1.186420836991513,
      "grad_norm": 0.00020606222096830606,
      "learning_rate": 2.0339479075212175e-05,
      "loss": 0.0,
      "step": 4054
    },
    {
      "epoch": 1.186713491366696,
      "grad_norm": 0.0003121673362329602,
      "learning_rate": 2.0332162715832603e-05,
      "loss": 0.0,
      "step": 4055
    },
    {
      "epoch": 1.1870061457418788,
      "grad_norm": 0.00022469591931439936,
      "learning_rate": 2.032484635645303e-05,
      "loss": 0.0,
      "step": 4056
    },
    {
      "epoch": 1.1872988001170617,
      "grad_norm": 0.0002625453344080597,
      "learning_rate": 2.0317529997073456e-05,
      "loss": 0.0,
      "step": 4057
    },
    {
      "epoch": 1.1875914544922446,
      "grad_norm": 0.00021747525897808373,
      "learning_rate": 2.0310213637693884e-05,
      "loss": 0.0,
      "step": 4058
    },
    {
      "epoch": 1.1878841088674275,
      "grad_norm": 0.00032584808650426567,
      "learning_rate": 2.030289727831431e-05,
      "loss": 0.0,
      "step": 4059
    },
    {
      "epoch": 1.1881767632426106,
      "grad_norm": 0.00024117686552926898,
      "learning_rate": 2.029558091893474e-05,
      "loss": 0.0,
      "step": 4060
    },
    {
      "epoch": 1.1884694176177935,
      "grad_norm": 0.00013844919158145785,
      "learning_rate": 2.0288264559555167e-05,
      "loss": 0.0,
      "step": 4061
    },
    {
      "epoch": 1.1887620719929763,
      "grad_norm": 0.00015658863412681967,
      "learning_rate": 2.0280948200175592e-05,
      "loss": 0.0,
      "step": 4062
    },
    {
      "epoch": 1.1890547263681592,
      "grad_norm": 0.00014539046969730407,
      "learning_rate": 2.027363184079602e-05,
      "loss": 0.0,
      "step": 4063
    },
    {
      "epoch": 1.1893473807433421,
      "grad_norm": 0.00018449235358275473,
      "learning_rate": 2.0266315481416448e-05,
      "loss": 0.0,
      "step": 4064
    },
    {
      "epoch": 1.189640035118525,
      "grad_norm": 0.00014005535922478884,
      "learning_rate": 2.0258999122036876e-05,
      "loss": 0.0,
      "step": 4065
    },
    {
      "epoch": 1.189932689493708,
      "grad_norm": 0.0002201669994974509,
      "learning_rate": 2.0251682762657304e-05,
      "loss": 0.0,
      "step": 4066
    },
    {
      "epoch": 1.1902253438688908,
      "grad_norm": 0.000424054975155741,
      "learning_rate": 2.024436640327773e-05,
      "loss": 0.0,
      "step": 4067
    },
    {
      "epoch": 1.1905179982440737,
      "grad_norm": 0.00021554843988269567,
      "learning_rate": 2.0237050043898156e-05,
      "loss": 0.0,
      "step": 4068
    },
    {
      "epoch": 1.1908106526192568,
      "grad_norm": 0.0001550370652694255,
      "learning_rate": 2.0229733684518584e-05,
      "loss": 0.0,
      "step": 4069
    },
    {
      "epoch": 1.1911033069944397,
      "grad_norm": 0.0001866599777713418,
      "learning_rate": 2.0222417325139012e-05,
      "loss": 0.0,
      "step": 4070
    },
    {
      "epoch": 1.1913959613696226,
      "grad_norm": 0.00020945954020135105,
      "learning_rate": 2.021510096575944e-05,
      "loss": 0.0,
      "step": 4071
    },
    {
      "epoch": 1.1916886157448054,
      "grad_norm": 0.0008195735281333327,
      "learning_rate": 2.0207784606379868e-05,
      "loss": 0.0,
      "step": 4072
    },
    {
      "epoch": 1.1919812701199883,
      "grad_norm": 0.00025099836057052016,
      "learning_rate": 2.0200468247000293e-05,
      "loss": 0.0,
      "step": 4073
    },
    {
      "epoch": 1.1922739244951712,
      "grad_norm": 0.0002605269255582243,
      "learning_rate": 2.019315188762072e-05,
      "loss": 0.0,
      "step": 4074
    },
    {
      "epoch": 1.192566578870354,
      "grad_norm": 0.0009943390032276511,
      "learning_rate": 2.018583552824115e-05,
      "loss": 0.0,
      "step": 4075
    },
    {
      "epoch": 1.192859233245537,
      "grad_norm": 0.0001504563115304336,
      "learning_rate": 2.0178519168861577e-05,
      "loss": 0.0,
      "step": 4076
    },
    {
      "epoch": 1.1931518876207199,
      "grad_norm": 0.001579423202201724,
      "learning_rate": 2.0171202809482005e-05,
      "loss": 0.0,
      "step": 4077
    },
    {
      "epoch": 1.1934445419959028,
      "grad_norm": 0.00023885307018645108,
      "learning_rate": 2.016388645010243e-05,
      "loss": 0.0,
      "step": 4078
    },
    {
      "epoch": 1.1937371963710857,
      "grad_norm": 0.0003890085208695382,
      "learning_rate": 2.0156570090722857e-05,
      "loss": 0.0,
      "step": 4079
    },
    {
      "epoch": 1.1940298507462686,
      "grad_norm": 0.00026691873790696263,
      "learning_rate": 2.0149253731343285e-05,
      "loss": 0.0,
      "step": 4080
    },
    {
      "epoch": 1.1943225051214517,
      "grad_norm": 0.00038226062315516174,
      "learning_rate": 2.0141937371963713e-05,
      "loss": 0.0,
      "step": 4081
    },
    {
      "epoch": 1.1946151594966345,
      "grad_norm": 0.00014924677088856697,
      "learning_rate": 2.013462101258414e-05,
      "loss": 0.0,
      "step": 4082
    },
    {
      "epoch": 1.1949078138718174,
      "grad_norm": 10.570392608642578,
      "learning_rate": 2.0127304653204565e-05,
      "loss": 0.0159,
      "step": 4083
    },
    {
      "epoch": 1.1952004682470003,
      "grad_norm": 0.0016134226461872458,
      "learning_rate": 2.0119988293824993e-05,
      "loss": 0.0,
      "step": 4084
    },
    {
      "epoch": 1.1954931226221832,
      "grad_norm": 0.00032581633422523737,
      "learning_rate": 2.011267193444542e-05,
      "loss": 0.0,
      "step": 4085
    },
    {
      "epoch": 1.195785776997366,
      "grad_norm": 0.00027731049340218306,
      "learning_rate": 2.010535557506585e-05,
      "loss": 0.0,
      "step": 4086
    },
    {
      "epoch": 1.196078431372549,
      "grad_norm": 0.00011565641761990264,
      "learning_rate": 2.0098039215686277e-05,
      "loss": 0.0,
      "step": 4087
    },
    {
      "epoch": 1.1963710857477319,
      "grad_norm": 9.082938194274902,
      "learning_rate": 2.0090722856306705e-05,
      "loss": 0.0128,
      "step": 4088
    },
    {
      "epoch": 1.1966637401229148,
      "grad_norm": 0.04108380898833275,
      "learning_rate": 2.008340649692713e-05,
      "loss": 0.0001,
      "step": 4089
    },
    {
      "epoch": 1.1969563944980977,
      "grad_norm": 0.0002807502169162035,
      "learning_rate": 2.0076090137547558e-05,
      "loss": 0.0,
      "step": 4090
    },
    {
      "epoch": 1.1972490488732808,
      "grad_norm": 0.00023227729252539575,
      "learning_rate": 2.0068773778167986e-05,
      "loss": 0.0,
      "step": 4091
    },
    {
      "epoch": 1.1975417032484637,
      "grad_norm": 0.00022215255012270063,
      "learning_rate": 2.0061457418788414e-05,
      "loss": 0.0,
      "step": 4092
    },
    {
      "epoch": 1.1978343576236465,
      "grad_norm": 0.0011499938555061817,
      "learning_rate": 2.005414105940884e-05,
      "loss": 0.0,
      "step": 4093
    },
    {
      "epoch": 1.1981270119988294,
      "grad_norm": 0.0001239770499523729,
      "learning_rate": 2.0046824700029266e-05,
      "loss": 0.0,
      "step": 4094
    },
    {
      "epoch": 1.1984196663740123,
      "grad_norm": 0.0002783769741654396,
      "learning_rate": 2.0039508340649694e-05,
      "loss": 0.0,
      "step": 4095
    },
    {
      "epoch": 1.1987123207491952,
      "grad_norm": 0.00011884696868946776,
      "learning_rate": 2.0032191981270122e-05,
      "loss": 0.0,
      "step": 4096
    },
    {
      "epoch": 1.199004975124378,
      "grad_norm": 0.00015855680976528674,
      "learning_rate": 2.002487562189055e-05,
      "loss": 0.0,
      "step": 4097
    },
    {
      "epoch": 1.199297629499561,
      "grad_norm": 0.0002209139202022925,
      "learning_rate": 2.0017559262510978e-05,
      "loss": 0.0,
      "step": 4098
    },
    {
      "epoch": 1.1995902838747439,
      "grad_norm": 0.0003696352359838784,
      "learning_rate": 2.0010242903131403e-05,
      "loss": 0.0,
      "step": 4099
    },
    {
      "epoch": 1.1998829382499268,
      "grad_norm": 0.006352592725306749,
      "learning_rate": 2.000292654375183e-05,
      "loss": 0.0,
      "step": 4100
    },
    {
      "epoch": 1.2001755926251096,
      "grad_norm": 0.00024181559274438769,
      "learning_rate": 1.999561018437226e-05,
      "loss": 0.0,
      "step": 4101
    },
    {
      "epoch": 1.2004682470002925,
      "grad_norm": 0.00061303615802899,
      "learning_rate": 1.9988293824992686e-05,
      "loss": 0.0,
      "step": 4102
    },
    {
      "epoch": 1.2007609013754756,
      "grad_norm": 0.0003650170692708343,
      "learning_rate": 1.9980977465613114e-05,
      "loss": 0.0,
      "step": 4103
    },
    {
      "epoch": 1.2010535557506585,
      "grad_norm": 0.0004829903191421181,
      "learning_rate": 1.997366110623354e-05,
      "loss": 0.0,
      "step": 4104
    },
    {
      "epoch": 1.2013462101258414,
      "grad_norm": 0.0010053931036964059,
      "learning_rate": 1.9966344746853967e-05,
      "loss": 0.0,
      "step": 4105
    },
    {
      "epoch": 1.2016388645010243,
      "grad_norm": 0.0004990804591216147,
      "learning_rate": 1.9959028387474395e-05,
      "loss": 0.0,
      "step": 4106
    },
    {
      "epoch": 1.2019315188762072,
      "grad_norm": 0.0003033496323041618,
      "learning_rate": 1.9951712028094823e-05,
      "loss": 0.0,
      "step": 4107
    },
    {
      "epoch": 1.20222417325139,
      "grad_norm": 0.0024242354556918144,
      "learning_rate": 1.9944395668715247e-05,
      "loss": 0.0,
      "step": 4108
    },
    {
      "epoch": 1.202516827626573,
      "grad_norm": 0.00014314088912215084,
      "learning_rate": 1.9937079309335675e-05,
      "loss": 0.0,
      "step": 4109
    },
    {
      "epoch": 1.2028094820017559,
      "grad_norm": 0.00014785472012590617,
      "learning_rate": 1.9929762949956103e-05,
      "loss": 0.0,
      "step": 4110
    },
    {
      "epoch": 1.2031021363769387,
      "grad_norm": 0.0008265096694231033,
      "learning_rate": 1.992244659057653e-05,
      "loss": 0.0,
      "step": 4111
    },
    {
      "epoch": 1.2033947907521219,
      "grad_norm": 0.00018385302973911166,
      "learning_rate": 1.9915130231196956e-05,
      "loss": 0.0,
      "step": 4112
    },
    {
      "epoch": 1.2036874451273047,
      "grad_norm": 0.00033309354330413043,
      "learning_rate": 1.9907813871817384e-05,
      "loss": 0.0,
      "step": 4113
    },
    {
      "epoch": 1.2039800995024876,
      "grad_norm": 0.00032117695081979036,
      "learning_rate": 1.990049751243781e-05,
      "loss": 0.0,
      "step": 4114
    },
    {
      "epoch": 1.2042727538776705,
      "grad_norm": 8.890005111694336,
      "learning_rate": 1.989318115305824e-05,
      "loss": 0.0187,
      "step": 4115
    },
    {
      "epoch": 1.2045654082528534,
      "grad_norm": 0.0002898283419199288,
      "learning_rate": 1.9885864793678664e-05,
      "loss": 0.0,
      "step": 4116
    },
    {
      "epoch": 1.2048580626280363,
      "grad_norm": 0.00043106195516884327,
      "learning_rate": 1.9878548434299092e-05,
      "loss": 0.0,
      "step": 4117
    },
    {
      "epoch": 1.2051507170032192,
      "grad_norm": 0.0008336814353242517,
      "learning_rate": 1.987123207491952e-05,
      "loss": 0.0,
      "step": 4118
    },
    {
      "epoch": 1.205443371378402,
      "grad_norm": 0.0005825763219036162,
      "learning_rate": 1.9863915715539948e-05,
      "loss": 0.0,
      "step": 4119
    },
    {
      "epoch": 1.205736025753585,
      "grad_norm": 0.0008535462548024952,
      "learning_rate": 1.9856599356160376e-05,
      "loss": 0.0,
      "step": 4120
    },
    {
      "epoch": 1.2060286801287678,
      "grad_norm": 0.000628861365839839,
      "learning_rate": 1.98492829967808e-05,
      "loss": 0.0,
      "step": 4121
    },
    {
      "epoch": 1.2063213345039507,
      "grad_norm": 0.0032100165262818336,
      "learning_rate": 1.984196663740123e-05,
      "loss": 0.0,
      "step": 4122
    },
    {
      "epoch": 1.2066139888791336,
      "grad_norm": 0.0007732409867458045,
      "learning_rate": 1.9834650278021656e-05,
      "loss": 0.0,
      "step": 4123
    },
    {
      "epoch": 1.2069066432543167,
      "grad_norm": 0.00039209527312777936,
      "learning_rate": 1.9827333918642084e-05,
      "loss": 0.0,
      "step": 4124
    },
    {
      "epoch": 1.2071992976294996,
      "grad_norm": 0.005325721111148596,
      "learning_rate": 1.9820017559262512e-05,
      "loss": 0.0,
      "step": 4125
    },
    {
      "epoch": 1.2074919520046825,
      "grad_norm": 0.00978061929345131,
      "learning_rate": 1.9812701199882937e-05,
      "loss": 0.0,
      "step": 4126
    },
    {
      "epoch": 1.2077846063798654,
      "grad_norm": 0.0028644222766160965,
      "learning_rate": 1.9805384840503365e-05,
      "loss": 0.0,
      "step": 4127
    },
    {
      "epoch": 1.2080772607550483,
      "grad_norm": 0.0008429356385022402,
      "learning_rate": 1.9798068481123793e-05,
      "loss": 0.0,
      "step": 4128
    },
    {
      "epoch": 1.2083699151302312,
      "grad_norm": 0.0014908623415976763,
      "learning_rate": 1.979075212174422e-05,
      "loss": 0.0,
      "step": 4129
    },
    {
      "epoch": 1.208662569505414,
      "grad_norm": 0.02699602022767067,
      "learning_rate": 1.978343576236465e-05,
      "loss": 0.0001,
      "step": 4130
    },
    {
      "epoch": 1.208955223880597,
      "grad_norm": 0.023643648251891136,
      "learning_rate": 1.9776119402985073e-05,
      "loss": 0.0001,
      "step": 4131
    },
    {
      "epoch": 1.2092478782557798,
      "grad_norm": 0.0020143059082329273,
      "learning_rate": 1.97688030436055e-05,
      "loss": 0.0,
      "step": 4132
    },
    {
      "epoch": 1.209540532630963,
      "grad_norm": 0.002474347362294793,
      "learning_rate": 1.976148668422593e-05,
      "loss": 0.0,
      "step": 4133
    },
    {
      "epoch": 1.2098331870061458,
      "grad_norm": 0.002414839807897806,
      "learning_rate": 1.9754170324846357e-05,
      "loss": 0.0,
      "step": 4134
    },
    {
      "epoch": 1.2101258413813287,
      "grad_norm": 0.0027450169436633587,
      "learning_rate": 1.9746853965466785e-05,
      "loss": 0.0,
      "step": 4135
    },
    {
      "epoch": 1.2104184957565116,
      "grad_norm": 0.0032009691931307316,
      "learning_rate": 1.973953760608721e-05,
      "loss": 0.0,
      "step": 4136
    },
    {
      "epoch": 1.2107111501316945,
      "grad_norm": 0.023865224793553352,
      "learning_rate": 1.9732221246707638e-05,
      "loss": 0.0001,
      "step": 4137
    },
    {
      "epoch": 1.2110038045068774,
      "grad_norm": 13.271195411682129,
      "learning_rate": 1.9724904887328065e-05,
      "loss": 0.0412,
      "step": 4138
    },
    {
      "epoch": 1.2112964588820603,
      "grad_norm": 0.0007114256150089204,
      "learning_rate": 1.9717588527948493e-05,
      "loss": 0.0,
      "step": 4139
    },
    {
      "epoch": 1.2115891132572432,
      "grad_norm": 0.010572691448032856,
      "learning_rate": 1.971027216856892e-05,
      "loss": 0.0,
      "step": 4140
    },
    {
      "epoch": 1.211881767632426,
      "grad_norm": 0.00038259930443018675,
      "learning_rate": 1.970295580918935e-05,
      "loss": 0.0,
      "step": 4141
    },
    {
      "epoch": 1.212174422007609,
      "grad_norm": 0.0001456064055673778,
      "learning_rate": 1.9695639449809774e-05,
      "loss": 0.0,
      "step": 4142
    },
    {
      "epoch": 1.2124670763827918,
      "grad_norm": 0.0004648877074941993,
      "learning_rate": 1.9688323090430202e-05,
      "loss": 0.0,
      "step": 4143
    },
    {
      "epoch": 1.2127597307579747,
      "grad_norm": 0.0002797534398268908,
      "learning_rate": 1.968100673105063e-05,
      "loss": 0.0,
      "step": 4144
    },
    {
      "epoch": 1.2130523851331578,
      "grad_norm": 0.001196891302242875,
      "learning_rate": 1.9673690371671058e-05,
      "loss": 0.0,
      "step": 4145
    },
    {
      "epoch": 1.2133450395083407,
      "grad_norm": 0.00014675207785330713,
      "learning_rate": 1.9666374012291486e-05,
      "loss": 0.0,
      "step": 4146
    },
    {
      "epoch": 1.2136376938835236,
      "grad_norm": 0.0002081404672935605,
      "learning_rate": 1.965905765291191e-05,
      "loss": 0.0,
      "step": 4147
    },
    {
      "epoch": 1.2139303482587065,
      "grad_norm": 0.0002005124551942572,
      "learning_rate": 1.9651741293532338e-05,
      "loss": 0.0,
      "step": 4148
    },
    {
      "epoch": 1.2142230026338894,
      "grad_norm": 0.0003484161861706525,
      "learning_rate": 1.9644424934152766e-05,
      "loss": 0.0,
      "step": 4149
    },
    {
      "epoch": 1.2145156570090723,
      "grad_norm": 0.0001250431960215792,
      "learning_rate": 1.9637108574773194e-05,
      "loss": 0.0,
      "step": 4150
    },
    {
      "epoch": 1.2148083113842552,
      "grad_norm": 0.00020586712344083935,
      "learning_rate": 1.9629792215393622e-05,
      "loss": 0.0,
      "step": 4151
    },
    {
      "epoch": 1.215100965759438,
      "grad_norm": 0.0001760764280334115,
      "learning_rate": 1.9622475856014047e-05,
      "loss": 0.0,
      "step": 4152
    },
    {
      "epoch": 1.215393620134621,
      "grad_norm": 0.0002378870121901855,
      "learning_rate": 1.9615159496634475e-05,
      "loss": 0.0,
      "step": 4153
    },
    {
      "epoch": 1.215686274509804,
      "grad_norm": 0.00010399192979093641,
      "learning_rate": 1.9607843137254903e-05,
      "loss": 0.0,
      "step": 4154
    },
    {
      "epoch": 1.215978928884987,
      "grad_norm": 0.0003964437055401504,
      "learning_rate": 1.960052677787533e-05,
      "loss": 0.0,
      "step": 4155
    },
    {
      "epoch": 1.2162715832601698,
      "grad_norm": 0.0001294235698878765,
      "learning_rate": 1.959321041849576e-05,
      "loss": 0.0,
      "step": 4156
    },
    {
      "epoch": 1.2165642376353527,
      "grad_norm": 0.00016335082182195038,
      "learning_rate": 1.9585894059116186e-05,
      "loss": 0.0,
      "step": 4157
    },
    {
      "epoch": 1.2168568920105356,
      "grad_norm": 0.0001436687307432294,
      "learning_rate": 1.957857769973661e-05,
      "loss": 0.0,
      "step": 4158
    },
    {
      "epoch": 1.2171495463857185,
      "grad_norm": 9.348212915938348e-05,
      "learning_rate": 1.957126134035704e-05,
      "loss": 0.0,
      "step": 4159
    },
    {
      "epoch": 1.2174422007609014,
      "grad_norm": 6.595941522391513e-05,
      "learning_rate": 1.9563944980977467e-05,
      "loss": 0.0,
      "step": 4160
    },
    {
      "epoch": 1.2177348551360843,
      "grad_norm": 6.128421955509111e-05,
      "learning_rate": 1.9556628621597895e-05,
      "loss": 0.0,
      "step": 4161
    },
    {
      "epoch": 1.2180275095112671,
      "grad_norm": 0.07660602033138275,
      "learning_rate": 1.9549312262218323e-05,
      "loss": 0.0001,
      "step": 4162
    },
    {
      "epoch": 1.21832016388645,
      "grad_norm": 0.0002029917377512902,
      "learning_rate": 1.9541995902838747e-05,
      "loss": 0.0,
      "step": 4163
    },
    {
      "epoch": 1.218612818261633,
      "grad_norm": 0.0019446692895144224,
      "learning_rate": 1.9534679543459175e-05,
      "loss": 0.0,
      "step": 4164
    },
    {
      "epoch": 1.2189054726368158,
      "grad_norm": 9.471770317759365e-05,
      "learning_rate": 1.9527363184079603e-05,
      "loss": 0.0,
      "step": 4165
    },
    {
      "epoch": 1.219198127011999,
      "grad_norm": 0.0022286963649094105,
      "learning_rate": 1.952004682470003e-05,
      "loss": 0.0,
      "step": 4166
    },
    {
      "epoch": 1.2194907813871818,
      "grad_norm": 0.00011821759107988328,
      "learning_rate": 1.951273046532046e-05,
      "loss": 0.0,
      "step": 4167
    },
    {
      "epoch": 1.2197834357623647,
      "grad_norm": 0.0004870724806096405,
      "learning_rate": 1.9505414105940884e-05,
      "loss": 0.0,
      "step": 4168
    },
    {
      "epoch": 1.2200760901375476,
      "grad_norm": 0.00013188557932153344,
      "learning_rate": 1.949809774656131e-05,
      "loss": 0.0,
      "step": 4169
    },
    {
      "epoch": 1.2203687445127305,
      "grad_norm": 0.0004395339055918157,
      "learning_rate": 1.949078138718174e-05,
      "loss": 0.0,
      "step": 4170
    },
    {
      "epoch": 1.2206613988879134,
      "grad_norm": 9.655337635194883e-05,
      "learning_rate": 1.9483465027802167e-05,
      "loss": 0.0,
      "step": 4171
    },
    {
      "epoch": 1.2209540532630963,
      "grad_norm": 0.00012214704474899918,
      "learning_rate": 1.9476148668422595e-05,
      "loss": 0.0,
      "step": 4172
    },
    {
      "epoch": 1.2212467076382791,
      "grad_norm": 0.0002902006381191313,
      "learning_rate": 1.9468832309043023e-05,
      "loss": 0.0,
      "step": 4173
    },
    {
      "epoch": 1.221539362013462,
      "grad_norm": 0.00023098342353478074,
      "learning_rate": 1.9461515949663448e-05,
      "loss": 0.0,
      "step": 4174
    },
    {
      "epoch": 1.221832016388645,
      "grad_norm": 4.296055316925049,
      "learning_rate": 1.9454199590283876e-05,
      "loss": 0.0029,
      "step": 4175
    },
    {
      "epoch": 1.222124670763828,
      "grad_norm": 0.00022539144265465438,
      "learning_rate": 1.9446883230904304e-05,
      "loss": 0.0,
      "step": 4176
    },
    {
      "epoch": 1.222417325139011,
      "grad_norm": 0.0034128634724766016,
      "learning_rate": 1.9439566871524732e-05,
      "loss": 0.0,
      "step": 4177
    },
    {
      "epoch": 1.2227099795141938,
      "grad_norm": 0.0001060848226188682,
      "learning_rate": 1.943225051214516e-05,
      "loss": 0.0,
      "step": 4178
    },
    {
      "epoch": 1.2230026338893767,
      "grad_norm": 0.0001370605459669605,
      "learning_rate": 1.9424934152765584e-05,
      "loss": 0.0,
      "step": 4179
    },
    {
      "epoch": 1.2232952882645596,
      "grad_norm": 0.0001054197273333557,
      "learning_rate": 1.9417617793386012e-05,
      "loss": 0.0,
      "step": 4180
    },
    {
      "epoch": 1.2235879426397425,
      "grad_norm": 0.00028701237170025706,
      "learning_rate": 1.941030143400644e-05,
      "loss": 0.0,
      "step": 4181
    },
    {
      "epoch": 1.2238805970149254,
      "grad_norm": 0.001215559197589755,
      "learning_rate": 1.9402985074626868e-05,
      "loss": 0.0,
      "step": 4182
    },
    {
      "epoch": 1.2241732513901082,
      "grad_norm": 0.00038847370888106525,
      "learning_rate": 1.9395668715247296e-05,
      "loss": 0.0,
      "step": 4183
    },
    {
      "epoch": 1.2244659057652911,
      "grad_norm": 0.00020916743960697204,
      "learning_rate": 1.938835235586772e-05,
      "loss": 0.0,
      "step": 4184
    },
    {
      "epoch": 1.224758560140474,
      "grad_norm": 0.00012006520410068333,
      "learning_rate": 1.938103599648815e-05,
      "loss": 0.0,
      "step": 4185
    },
    {
      "epoch": 1.225051214515657,
      "grad_norm": 0.00022883755445946008,
      "learning_rate": 1.9373719637108577e-05,
      "loss": 0.0,
      "step": 4186
    },
    {
      "epoch": 1.2253438688908398,
      "grad_norm": 0.0009294861229136586,
      "learning_rate": 1.9366403277729005e-05,
      "loss": 0.0,
      "step": 4187
    },
    {
      "epoch": 1.225636523266023,
      "grad_norm": 0.0002540037385188043,
      "learning_rate": 1.9359086918349432e-05,
      "loss": 0.0,
      "step": 4188
    },
    {
      "epoch": 1.2259291776412058,
      "grad_norm": 0.00010973669122904539,
      "learning_rate": 1.935177055896986e-05,
      "loss": 0.0,
      "step": 4189
    },
    {
      "epoch": 1.2262218320163887,
      "grad_norm": 0.00018544113845564425,
      "learning_rate": 1.9344454199590285e-05,
      "loss": 0.0,
      "step": 4190
    },
    {
      "epoch": 1.2265144863915716,
      "grad_norm": 0.0005819756188429892,
      "learning_rate": 1.9337137840210713e-05,
      "loss": 0.0,
      "step": 4191
    },
    {
      "epoch": 1.2268071407667545,
      "grad_norm": 6.846505857538432e-05,
      "learning_rate": 1.932982148083114e-05,
      "loss": 0.0,
      "step": 4192
    },
    {
      "epoch": 1.2270997951419373,
      "grad_norm": 0.0001275314571103081,
      "learning_rate": 1.932250512145157e-05,
      "loss": 0.0,
      "step": 4193
    },
    {
      "epoch": 1.2273924495171202,
      "grad_norm": 0.016942564398050308,
      "learning_rate": 1.9315188762071997e-05,
      "loss": 0.0001,
      "step": 4194
    },
    {
      "epoch": 1.2276851038923031,
      "grad_norm": 0.0003085004282183945,
      "learning_rate": 1.930787240269242e-05,
      "loss": 0.0,
      "step": 4195
    },
    {
      "epoch": 1.227977758267486,
      "grad_norm": 0.00022445937793236226,
      "learning_rate": 1.930055604331285e-05,
      "loss": 0.0,
      "step": 4196
    },
    {
      "epoch": 1.2282704126426691,
      "grad_norm": 0.0006460981676355004,
      "learning_rate": 1.9293239683933277e-05,
      "loss": 0.0,
      "step": 4197
    },
    {
      "epoch": 1.228563067017852,
      "grad_norm": 3.556636095046997,
      "learning_rate": 1.9285923324553705e-05,
      "loss": 0.1548,
      "step": 4198
    },
    {
      "epoch": 1.228855721393035,
      "grad_norm": 0.238409623503685,
      "learning_rate": 1.927860696517413e-05,
      "loss": 0.0009,
      "step": 4199
    },
    {
      "epoch": 1.2291483757682178,
      "grad_norm": 0.10258849710226059,
      "learning_rate": 1.9271290605794558e-05,
      "loss": 0.0003,
      "step": 4200
    },
    {
      "epoch": 1.2294410301434007,
      "grad_norm": 0.00675250543281436,
      "learning_rate": 1.9263974246414986e-05,
      "loss": 0.0,
      "step": 4201
    },
    {
      "epoch": 1.2297336845185836,
      "grad_norm": 0.0019149247091263533,
      "learning_rate": 1.9256657887035414e-05,
      "loss": 0.0,
      "step": 4202
    },
    {
      "epoch": 1.2300263388937664,
      "grad_norm": 0.008474493399262428,
      "learning_rate": 1.9249341527655838e-05,
      "loss": 0.0001,
      "step": 4203
    },
    {
      "epoch": 1.2303189932689493,
      "grad_norm": 0.000252736295806244,
      "learning_rate": 1.9242025168276266e-05,
      "loss": 0.0,
      "step": 4204
    },
    {
      "epoch": 1.2306116476441322,
      "grad_norm": 0.0008034301572479308,
      "learning_rate": 1.9234708808896694e-05,
      "loss": 0.0,
      "step": 4205
    },
    {
      "epoch": 1.230904302019315,
      "grad_norm": 0.001213454408571124,
      "learning_rate": 1.9227392449517122e-05,
      "loss": 0.0,
      "step": 4206
    },
    {
      "epoch": 1.231196956394498,
      "grad_norm": 0.004696414805948734,
      "learning_rate": 1.9220076090137547e-05,
      "loss": 0.0001,
      "step": 4207
    },
    {
      "epoch": 1.2314896107696809,
      "grad_norm": 0.0035157673992216587,
      "learning_rate": 1.9212759730757975e-05,
      "loss": 0.0,
      "step": 4208
    },
    {
      "epoch": 1.231782265144864,
      "grad_norm": 0.0005110283964313567,
      "learning_rate": 1.9205443371378402e-05,
      "loss": 0.0,
      "step": 4209
    },
    {
      "epoch": 1.2320749195200469,
      "grad_norm": 0.9316847324371338,
      "learning_rate": 1.919812701199883e-05,
      "loss": 0.0044,
      "step": 4210
    },
    {
      "epoch": 1.2323675738952298,
      "grad_norm": 0.2705109417438507,
      "learning_rate": 1.9190810652619255e-05,
      "loss": 0.0007,
      "step": 4211
    },
    {
      "epoch": 1.2326602282704127,
      "grad_norm": 0.03438115492463112,
      "learning_rate": 1.9183494293239683e-05,
      "loss": 0.0003,
      "step": 4212
    },
    {
      "epoch": 1.2329528826455955,
      "grad_norm": 0.02816217578947544,
      "learning_rate": 1.917617793386011e-05,
      "loss": 0.0003,
      "step": 4213
    },
    {
      "epoch": 1.2332455370207784,
      "grad_norm": 0.002822326961904764,
      "learning_rate": 1.916886157448054e-05,
      "loss": 0.0,
      "step": 4214
    },
    {
      "epoch": 1.2335381913959613,
      "grad_norm": 0.011674858629703522,
      "learning_rate": 1.9161545215100967e-05,
      "loss": 0.0001,
      "step": 4215
    },
    {
      "epoch": 1.2338308457711442,
      "grad_norm": 0.0035582296550273895,
      "learning_rate": 1.915422885572139e-05,
      "loss": 0.0,
      "step": 4216
    },
    {
      "epoch": 1.234123500146327,
      "grad_norm": 8.236109715653583e-05,
      "learning_rate": 1.914691249634182e-05,
      "loss": 0.0,
      "step": 4217
    },
    {
      "epoch": 1.2344161545215102,
      "grad_norm": 0.00041809817776083946,
      "learning_rate": 1.9139596136962247e-05,
      "loss": 0.0,
      "step": 4218
    },
    {
      "epoch": 1.234708808896693,
      "grad_norm": 0.0004134566697757691,
      "learning_rate": 1.9132279777582675e-05,
      "loss": 0.0,
      "step": 4219
    },
    {
      "epoch": 1.235001463271876,
      "grad_norm": 0.00023026966664474458,
      "learning_rate": 1.9124963418203103e-05,
      "loss": 0.0,
      "step": 4220
    },
    {
      "epoch": 1.2352941176470589,
      "grad_norm": 0.00014988753537181765,
      "learning_rate": 1.9117647058823528e-05,
      "loss": 0.0,
      "step": 4221
    },
    {
      "epoch": 1.2355867720222418,
      "grad_norm": 0.0008121030987240374,
      "learning_rate": 1.9110330699443956e-05,
      "loss": 0.0,
      "step": 4222
    },
    {
      "epoch": 1.2358794263974247,
      "grad_norm": 0.00311463326215744,
      "learning_rate": 1.9103014340064384e-05,
      "loss": 0.0,
      "step": 4223
    },
    {
      "epoch": 1.2361720807726075,
      "grad_norm": 0.0025886602234095335,
      "learning_rate": 1.909569798068481e-05,
      "loss": 0.0,
      "step": 4224
    },
    {
      "epoch": 1.2364647351477904,
      "grad_norm": 0.0012801195261999965,
      "learning_rate": 1.908838162130524e-05,
      "loss": 0.0,
      "step": 4225
    },
    {
      "epoch": 1.2367573895229733,
      "grad_norm": 7.799321610946208e-05,
      "learning_rate": 1.9081065261925667e-05,
      "loss": 0.0,
      "step": 4226
    },
    {
      "epoch": 1.2370500438981562,
      "grad_norm": 0.00016507963300682604,
      "learning_rate": 1.9073748902546092e-05,
      "loss": 0.0,
      "step": 4227
    },
    {
      "epoch": 1.237342698273339,
      "grad_norm": 0.0007038047770038247,
      "learning_rate": 1.906643254316652e-05,
      "loss": 0.0,
      "step": 4228
    },
    {
      "epoch": 1.237635352648522,
      "grad_norm": 0.00018707582785282284,
      "learning_rate": 1.9059116183786948e-05,
      "loss": 0.0,
      "step": 4229
    },
    {
      "epoch": 1.237928007023705,
      "grad_norm": 9.05595370568335e-05,
      "learning_rate": 1.9051799824407376e-05,
      "loss": 0.0,
      "step": 4230
    },
    {
      "epoch": 1.238220661398888,
      "grad_norm": 3.0786476135253906,
      "learning_rate": 1.9044483465027804e-05,
      "loss": 0.2078,
      "step": 4231
    },
    {
      "epoch": 1.2385133157740709,
      "grad_norm": 0.0010520414216443896,
      "learning_rate": 1.903716710564823e-05,
      "loss": 0.0,
      "step": 4232
    },
    {
      "epoch": 1.2388059701492538,
      "grad_norm": 0.00023917222279123962,
      "learning_rate": 1.9029850746268656e-05,
      "loss": 0.0,
      "step": 4233
    },
    {
      "epoch": 1.2390986245244366,
      "grad_norm": 0.0007023094221949577,
      "learning_rate": 1.9022534386889084e-05,
      "loss": 0.0,
      "step": 4234
    },
    {
      "epoch": 1.2393912788996195,
      "grad_norm": 0.0011123003205284476,
      "learning_rate": 1.9015218027509512e-05,
      "loss": 0.0,
      "step": 4235
    },
    {
      "epoch": 1.2396839332748024,
      "grad_norm": 0.00146926857996732,
      "learning_rate": 1.900790166812994e-05,
      "loss": 0.0,
      "step": 4236
    },
    {
      "epoch": 1.2399765876499853,
      "grad_norm": 0.0022972996812313795,
      "learning_rate": 1.9000585308750365e-05,
      "loss": 0.0,
      "step": 4237
    },
    {
      "epoch": 1.2402692420251682,
      "grad_norm": 0.040048111230134964,
      "learning_rate": 1.8993268949370793e-05,
      "loss": 0.0004,
      "step": 4238
    },
    {
      "epoch": 1.2405618964003513,
      "grad_norm": 0.019376909360289574,
      "learning_rate": 1.898595258999122e-05,
      "loss": 0.0002,
      "step": 4239
    },
    {
      "epoch": 1.2408545507755342,
      "grad_norm": 0.01507840771228075,
      "learning_rate": 1.897863623061165e-05,
      "loss": 0.0001,
      "step": 4240
    },
    {
      "epoch": 1.241147205150717,
      "grad_norm": 0.04815857484936714,
      "learning_rate": 1.8971319871232077e-05,
      "loss": 0.0005,
      "step": 4241
    },
    {
      "epoch": 1.2414398595259,
      "grad_norm": 0.12103767693042755,
      "learning_rate": 1.8964003511852505e-05,
      "loss": 0.0018,
      "step": 4242
    },
    {
      "epoch": 1.2417325139010829,
      "grad_norm": 0.0030685067176818848,
      "learning_rate": 1.895668715247293e-05,
      "loss": 0.0,
      "step": 4243
    },
    {
      "epoch": 1.2420251682762657,
      "grad_norm": 0.2151612788438797,
      "learning_rate": 1.8949370793093357e-05,
      "loss": 0.0029,
      "step": 4244
    },
    {
      "epoch": 1.2423178226514486,
      "grad_norm": 0.010484547354280949,
      "learning_rate": 1.8942054433713785e-05,
      "loss": 0.0001,
      "step": 4245
    },
    {
      "epoch": 1.2426104770266315,
      "grad_norm": 0.004427577834576368,
      "learning_rate": 1.8934738074334213e-05,
      "loss": 0.0001,
      "step": 4246
    },
    {
      "epoch": 1.2429031314018144,
      "grad_norm": 0.003969315905123949,
      "learning_rate": 1.892742171495464e-05,
      "loss": 0.0,
      "step": 4247
    },
    {
      "epoch": 1.2431957857769973,
      "grad_norm": 0.018904047086834908,
      "learning_rate": 1.8920105355575065e-05,
      "loss": 0.0002,
      "step": 4248
    },
    {
      "epoch": 1.2434884401521802,
      "grad_norm": 0.017943337559700012,
      "learning_rate": 1.8912788996195493e-05,
      "loss": 0.0002,
      "step": 4249
    },
    {
      "epoch": 1.243781094527363,
      "grad_norm": 0.004366525448858738,
      "learning_rate": 1.890547263681592e-05,
      "loss": 0.0,
      "step": 4250
    },
    {
      "epoch": 1.2440737489025462,
      "grad_norm": 0.19857020676136017,
      "learning_rate": 1.889815627743635e-05,
      "loss": 0.0011,
      "step": 4251
    },
    {
      "epoch": 1.244366403277729,
      "grad_norm": 0.0003746624570339918,
      "learning_rate": 1.8890839918056777e-05,
      "loss": 0.0,
      "step": 4252
    },
    {
      "epoch": 1.244659057652912,
      "grad_norm": 0.012967000715434551,
      "learning_rate": 1.8883523558677202e-05,
      "loss": 0.0001,
      "step": 4253
    },
    {
      "epoch": 1.2449517120280948,
      "grad_norm": 0.008523451164364815,
      "learning_rate": 1.887620719929763e-05,
      "loss": 0.0001,
      "step": 4254
    },
    {
      "epoch": 1.2452443664032777,
      "grad_norm": 0.0003721773100551218,
      "learning_rate": 1.8868890839918058e-05,
      "loss": 0.0,
      "step": 4255
    },
    {
      "epoch": 1.2455370207784606,
      "grad_norm": 0.0015894857933744788,
      "learning_rate": 1.8861574480538486e-05,
      "loss": 0.0,
      "step": 4256
    },
    {
      "epoch": 1.2458296751536435,
      "grad_norm": 0.0010224776342511177,
      "learning_rate": 1.8854258121158914e-05,
      "loss": 0.0,
      "step": 4257
    },
    {
      "epoch": 1.2461223295288264,
      "grad_norm": 0.014933550730347633,
      "learning_rate": 1.8846941761779338e-05,
      "loss": 0.0002,
      "step": 4258
    },
    {
      "epoch": 1.2464149839040093,
      "grad_norm": 0.010465152561664581,
      "learning_rate": 1.8839625402399766e-05,
      "loss": 0.0001,
      "step": 4259
    },
    {
      "epoch": 1.2467076382791924,
      "grad_norm": 0.00037264273851178586,
      "learning_rate": 1.8832309043020194e-05,
      "loss": 0.0,
      "step": 4260
    },
    {
      "epoch": 1.2470002926543753,
      "grad_norm": 0.00139771425165236,
      "learning_rate": 1.8824992683640622e-05,
      "loss": 0.0,
      "step": 4261
    },
    {
      "epoch": 1.2472929470295582,
      "grad_norm": 0.0018496651900932193,
      "learning_rate": 1.881767632426105e-05,
      "loss": 0.0,
      "step": 4262
    },
    {
      "epoch": 1.247585601404741,
      "grad_norm": 0.0002926725137513131,
      "learning_rate": 1.8810359964881478e-05,
      "loss": 0.0,
      "step": 4263
    },
    {
      "epoch": 1.247878255779924,
      "grad_norm": 0.0026824823580682278,
      "learning_rate": 1.8803043605501902e-05,
      "loss": 0.0,
      "step": 4264
    },
    {
      "epoch": 1.2481709101551068,
      "grad_norm": 0.00018589686078485101,
      "learning_rate": 1.879572724612233e-05,
      "loss": 0.0,
      "step": 4265
    },
    {
      "epoch": 1.2484635645302897,
      "grad_norm": 0.00043787300819531083,
      "learning_rate": 1.878841088674276e-05,
      "loss": 0.0,
      "step": 4266
    },
    {
      "epoch": 1.2487562189054726,
      "grad_norm": 0.00044568435987457633,
      "learning_rate": 1.8781094527363186e-05,
      "loss": 0.0,
      "step": 4267
    },
    {
      "epoch": 1.2490488732806555,
      "grad_norm": 0.0006209853454492986,
      "learning_rate": 1.8773778167983614e-05,
      "loss": 0.0,
      "step": 4268
    },
    {
      "epoch": 1.2493415276558384,
      "grad_norm": 0.006390134803950787,
      "learning_rate": 1.876646180860404e-05,
      "loss": 0.0,
      "step": 4269
    },
    {
      "epoch": 1.2496341820310213,
      "grad_norm": 0.00033496366813778877,
      "learning_rate": 1.8759145449224467e-05,
      "loss": 0.0,
      "step": 4270
    },
    {
      "epoch": 1.2499268364062042,
      "grad_norm": 0.005502917338162661,
      "learning_rate": 1.8751829089844895e-05,
      "loss": 0.0,
      "step": 4271
    },
    {
      "epoch": 1.250219490781387,
      "grad_norm": 0.00027335871709510684,
      "learning_rate": 1.8744512730465323e-05,
      "loss": 0.0,
      "step": 4272
    },
    {
      "epoch": 1.2505121451565702,
      "grad_norm": 0.00015072182577569038,
      "learning_rate": 1.873719637108575e-05,
      "loss": 0.0,
      "step": 4273
    },
    {
      "epoch": 1.250804799531753,
      "grad_norm": 0.0002814159670379013,
      "learning_rate": 1.8729880011706175e-05,
      "loss": 0.0,
      "step": 4274
    },
    {
      "epoch": 1.251097453906936,
      "grad_norm": 0.0004379993479233235,
      "learning_rate": 1.8722563652326603e-05,
      "loss": 0.0,
      "step": 4275
    },
    {
      "epoch": 1.2513901082821188,
      "grad_norm": 0.003155686892569065,
      "learning_rate": 1.871524729294703e-05,
      "loss": 0.0,
      "step": 4276
    },
    {
      "epoch": 1.2516827626573017,
      "grad_norm": 0.00023797780158929527,
      "learning_rate": 1.870793093356746e-05,
      "loss": 0.0,
      "step": 4277
    },
    {
      "epoch": 1.2519754170324846,
      "grad_norm": 0.040031664073467255,
      "learning_rate": 1.8700614574187887e-05,
      "loss": 0.0001,
      "step": 4278
    },
    {
      "epoch": 1.2522680714076675,
      "grad_norm": 0.0011026357533410192,
      "learning_rate": 1.8693298214808315e-05,
      "loss": 0.0,
      "step": 4279
    },
    {
      "epoch": 1.2525607257828504,
      "grad_norm": 0.006923212204128504,
      "learning_rate": 1.868598185542874e-05,
      "loss": 0.0001,
      "step": 4280
    },
    {
      "epoch": 1.2528533801580335,
      "grad_norm": 0.00014879163063596934,
      "learning_rate": 1.8678665496049167e-05,
      "loss": 0.0,
      "step": 4281
    },
    {
      "epoch": 1.2531460345332164,
      "grad_norm": 0.0005386594566516578,
      "learning_rate": 1.8671349136669595e-05,
      "loss": 0.0,
      "step": 4282
    },
    {
      "epoch": 1.2534386889083993,
      "grad_norm": 0.011392862536013126,
      "learning_rate": 1.8664032777290023e-05,
      "loss": 0.0001,
      "step": 4283
    },
    {
      "epoch": 1.2537313432835822,
      "grad_norm": 0.00036759424256160855,
      "learning_rate": 1.865671641791045e-05,
      "loss": 0.0,
      "step": 4284
    },
    {
      "epoch": 1.254023997658765,
      "grad_norm": 0.06291080266237259,
      "learning_rate": 1.8649400058530876e-05,
      "loss": 0.0008,
      "step": 4285
    },
    {
      "epoch": 1.254316652033948,
      "grad_norm": 0.00039730401476845145,
      "learning_rate": 1.8642083699151304e-05,
      "loss": 0.0,
      "step": 4286
    },
    {
      "epoch": 1.2546093064091308,
      "grad_norm": 0.00015699613140895963,
      "learning_rate": 1.8634767339771732e-05,
      "loss": 0.0,
      "step": 4287
    },
    {
      "epoch": 1.2549019607843137,
      "grad_norm": 39.24689865112305,
      "learning_rate": 1.862745098039216e-05,
      "loss": 0.0722,
      "step": 4288
    },
    {
      "epoch": 1.2551946151594966,
      "grad_norm": 0.0009274079347960651,
      "learning_rate": 1.8620134621012584e-05,
      "loss": 0.0,
      "step": 4289
    },
    {
      "epoch": 1.2554872695346795,
      "grad_norm": 0.00017085866420529783,
      "learning_rate": 1.8612818261633012e-05,
      "loss": 0.0,
      "step": 4290
    },
    {
      "epoch": 1.2557799239098624,
      "grad_norm": 0.00016269812476821244,
      "learning_rate": 1.860550190225344e-05,
      "loss": 0.0,
      "step": 4291
    },
    {
      "epoch": 1.2560725782850453,
      "grad_norm": 0.002036924008280039,
      "learning_rate": 1.8598185542873868e-05,
      "loss": 0.0,
      "step": 4292
    },
    {
      "epoch": 1.2563652326602281,
      "grad_norm": 0.003428952069953084,
      "learning_rate": 1.8590869183494293e-05,
      "loss": 0.0,
      "step": 4293
    },
    {
      "epoch": 1.2566578870354113,
      "grad_norm": 0.04832572862505913,
      "learning_rate": 1.858355282411472e-05,
      "loss": 0.0006,
      "step": 4294
    },
    {
      "epoch": 1.2569505414105941,
      "grad_norm": 0.04637840390205383,
      "learning_rate": 1.857623646473515e-05,
      "loss": 0.0006,
      "step": 4295
    },
    {
      "epoch": 1.257243195785777,
      "grad_norm": 0.00366193731315434,
      "learning_rate": 1.8568920105355577e-05,
      "loss": 0.0,
      "step": 4296
    },
    {
      "epoch": 1.25753585016096,
      "grad_norm": 0.00028218806255608797,
      "learning_rate": 1.8561603745976e-05,
      "loss": 0.0,
      "step": 4297
    },
    {
      "epoch": 1.2578285045361428,
      "grad_norm": 0.00024327139544766396,
      "learning_rate": 1.855428738659643e-05,
      "loss": 0.0,
      "step": 4298
    },
    {
      "epoch": 1.2581211589113257,
      "grad_norm": 0.0015721708768978715,
      "learning_rate": 1.8546971027216857e-05,
      "loss": 0.0,
      "step": 4299
    },
    {
      "epoch": 1.2584138132865086,
      "grad_norm": 0.0009468572679907084,
      "learning_rate": 1.8539654667837285e-05,
      "loss": 0.0,
      "step": 4300
    },
    {
      "epoch": 1.2587064676616915,
      "grad_norm": 0.31393736600875854,
      "learning_rate": 1.853233830845771e-05,
      "loss": 0.0007,
      "step": 4301
    },
    {
      "epoch": 1.2589991220368746,
      "grad_norm": 0.0014417548663914204,
      "learning_rate": 1.8525021949078137e-05,
      "loss": 0.0,
      "step": 4302
    },
    {
      "epoch": 1.2592917764120575,
      "grad_norm": 0.0002797488123178482,
      "learning_rate": 1.8517705589698565e-05,
      "loss": 0.0,
      "step": 4303
    },
    {
      "epoch": 1.2595844307872404,
      "grad_norm": 0.00036022544372826815,
      "learning_rate": 1.8510389230318993e-05,
      "loss": 0.0,
      "step": 4304
    },
    {
      "epoch": 1.2598770851624232,
      "grad_norm": 0.004282816778868437,
      "learning_rate": 1.850307287093942e-05,
      "loss": 0.0,
      "step": 4305
    },
    {
      "epoch": 1.2601697395376061,
      "grad_norm": 0.011342636309564114,
      "learning_rate": 1.8495756511559846e-05,
      "loss": 0.0001,
      "step": 4306
    },
    {
      "epoch": 1.260462393912789,
      "grad_norm": 0.001547664636746049,
      "learning_rate": 1.8488440152180274e-05,
      "loss": 0.0,
      "step": 4307
    },
    {
      "epoch": 1.260755048287972,
      "grad_norm": 0.0006469974759966135,
      "learning_rate": 1.8481123792800702e-05,
      "loss": 0.0,
      "step": 4308
    },
    {
      "epoch": 1.2610477026631548,
      "grad_norm": 0.0004033749573864043,
      "learning_rate": 1.847380743342113e-05,
      "loss": 0.0,
      "step": 4309
    },
    {
      "epoch": 1.2613403570383377,
      "grad_norm": 0.0019031958654522896,
      "learning_rate": 1.8466491074041558e-05,
      "loss": 0.0,
      "step": 4310
    },
    {
      "epoch": 1.2616330114135206,
      "grad_norm": 0.0014144045999273658,
      "learning_rate": 1.8459174714661986e-05,
      "loss": 0.0,
      "step": 4311
    },
    {
      "epoch": 1.2619256657887035,
      "grad_norm": 0.007924694567918777,
      "learning_rate": 1.845185835528241e-05,
      "loss": 0.0001,
      "step": 4312
    },
    {
      "epoch": 1.2622183201638864,
      "grad_norm": 0.006693069823086262,
      "learning_rate": 1.8444541995902838e-05,
      "loss": 0.0001,
      "step": 4313
    },
    {
      "epoch": 1.2625109745390692,
      "grad_norm": 0.00043680029921233654,
      "learning_rate": 1.8437225636523266e-05,
      "loss": 0.0,
      "step": 4314
    },
    {
      "epoch": 1.2628036289142521,
      "grad_norm": 0.0006893217214383185,
      "learning_rate": 1.8429909277143694e-05,
      "loss": 0.0,
      "step": 4315
    },
    {
      "epoch": 1.2630962832894352,
      "grad_norm": 0.0004322109743952751,
      "learning_rate": 1.8422592917764122e-05,
      "loss": 0.0,
      "step": 4316
    },
    {
      "epoch": 1.2633889376646181,
      "grad_norm": 0.00439478037878871,
      "learning_rate": 1.8415276558384547e-05,
      "loss": 0.0001,
      "step": 4317
    },
    {
      "epoch": 1.263681592039801,
      "grad_norm": 0.0003082916955463588,
      "learning_rate": 1.8407960199004975e-05,
      "loss": 0.0,
      "step": 4318
    },
    {
      "epoch": 1.263974246414984,
      "grad_norm": 0.001095048850402236,
      "learning_rate": 1.8400643839625402e-05,
      "loss": 0.0,
      "step": 4319
    },
    {
      "epoch": 1.2642669007901668,
      "grad_norm": 0.0002884860441554338,
      "learning_rate": 1.839332748024583e-05,
      "loss": 0.0,
      "step": 4320
    },
    {
      "epoch": 1.2645595551653497,
      "grad_norm": 0.0017246073111891747,
      "learning_rate": 1.838601112086626e-05,
      "loss": 0.0,
      "step": 4321
    },
    {
      "epoch": 1.2648522095405326,
      "grad_norm": 0.00025704840663820505,
      "learning_rate": 1.8378694761486683e-05,
      "loss": 0.0,
      "step": 4322
    },
    {
      "epoch": 1.2651448639157157,
      "grad_norm": 0.0005988952470943332,
      "learning_rate": 1.837137840210711e-05,
      "loss": 0.0,
      "step": 4323
    },
    {
      "epoch": 1.2654375182908986,
      "grad_norm": 0.0017043894622474909,
      "learning_rate": 1.836406204272754e-05,
      "loss": 0.0,
      "step": 4324
    },
    {
      "epoch": 1.2657301726660815,
      "grad_norm": 0.0003185214300174266,
      "learning_rate": 1.8356745683347967e-05,
      "loss": 0.0,
      "step": 4325
    },
    {
      "epoch": 1.2660228270412643,
      "grad_norm": 0.0003728196315933019,
      "learning_rate": 1.8349429323968395e-05,
      "loss": 0.0,
      "step": 4326
    },
    {
      "epoch": 1.2663154814164472,
      "grad_norm": 0.002830952638760209,
      "learning_rate": 1.834211296458882e-05,
      "loss": 0.0,
      "step": 4327
    },
    {
      "epoch": 1.2666081357916301,
      "grad_norm": 0.00018264648679178208,
      "learning_rate": 1.8334796605209247e-05,
      "loss": 0.0,
      "step": 4328
    },
    {
      "epoch": 1.266900790166813,
      "grad_norm": 0.00030146719655022025,
      "learning_rate": 1.8327480245829675e-05,
      "loss": 0.0,
      "step": 4329
    },
    {
      "epoch": 1.267193444541996,
      "grad_norm": 0.0001685014140093699,
      "learning_rate": 1.8320163886450103e-05,
      "loss": 0.0,
      "step": 4330
    },
    {
      "epoch": 1.2674860989171788,
      "grad_norm": 0.0008038796368055046,
      "learning_rate": 1.831284752707053e-05,
      "loss": 0.0,
      "step": 4331
    },
    {
      "epoch": 1.2677787532923617,
      "grad_norm": 0.0002496697998140007,
      "learning_rate": 1.830553116769096e-05,
      "loss": 0.0,
      "step": 4332
    },
    {
      "epoch": 1.2680714076675446,
      "grad_norm": 0.0002453435445204377,
      "learning_rate": 1.8298214808311384e-05,
      "loss": 0.0,
      "step": 4333
    },
    {
      "epoch": 1.2683640620427274,
      "grad_norm": 0.00045765130198560655,
      "learning_rate": 1.829089844893181e-05,
      "loss": 0.0,
      "step": 4334
    },
    {
      "epoch": 1.2686567164179103,
      "grad_norm": 0.001018995069898665,
      "learning_rate": 1.828358208955224e-05,
      "loss": 0.0,
      "step": 4335
    },
    {
      "epoch": 1.2689493707930932,
      "grad_norm": 0.00016092466830741614,
      "learning_rate": 1.8276265730172667e-05,
      "loss": 0.0,
      "step": 4336
    },
    {
      "epoch": 1.2692420251682763,
      "grad_norm": 0.00014827775885351002,
      "learning_rate": 1.8268949370793095e-05,
      "loss": 0.0,
      "step": 4337
    },
    {
      "epoch": 1.2695346795434592,
      "grad_norm": 0.0003899074799846858,
      "learning_rate": 1.826163301141352e-05,
      "loss": 0.0,
      "step": 4338
    },
    {
      "epoch": 1.269827333918642,
      "grad_norm": 0.00010926521645160392,
      "learning_rate": 1.8254316652033948e-05,
      "loss": 0.0,
      "step": 4339
    },
    {
      "epoch": 1.270119988293825,
      "grad_norm": 0.0010218808893114328,
      "learning_rate": 1.8247000292654376e-05,
      "loss": 0.0,
      "step": 4340
    },
    {
      "epoch": 1.2704126426690079,
      "grad_norm": 0.0007877651951275766,
      "learning_rate": 1.8239683933274804e-05,
      "loss": 0.0,
      "step": 4341
    },
    {
      "epoch": 1.2707052970441908,
      "grad_norm": 0.00018021459982264787,
      "learning_rate": 1.8232367573895232e-05,
      "loss": 0.0,
      "step": 4342
    },
    {
      "epoch": 1.2709979514193737,
      "grad_norm": 0.0012451495276764035,
      "learning_rate": 1.8225051214515656e-05,
      "loss": 0.0,
      "step": 4343
    },
    {
      "epoch": 1.2712906057945565,
      "grad_norm": 0.0005864020204171538,
      "learning_rate": 1.8217734855136084e-05,
      "loss": 0.0,
      "step": 4344
    },
    {
      "epoch": 1.2715832601697397,
      "grad_norm": 0.00013048344408161938,
      "learning_rate": 1.8210418495756512e-05,
      "loss": 0.0,
      "step": 4345
    },
    {
      "epoch": 1.2718759145449225,
      "grad_norm": 0.0015791425248607993,
      "learning_rate": 1.820310213637694e-05,
      "loss": 0.0,
      "step": 4346
    },
    {
      "epoch": 1.2721685689201054,
      "grad_norm": 0.00019306503236293793,
      "learning_rate": 1.8195785776997368e-05,
      "loss": 0.0,
      "step": 4347
    },
    {
      "epoch": 1.2724612232952883,
      "grad_norm": 0.00011835879558930174,
      "learning_rate": 1.8188469417617796e-05,
      "loss": 0.0,
      "step": 4348
    },
    {
      "epoch": 1.2727538776704712,
      "grad_norm": 0.0002314967568963766,
      "learning_rate": 1.818115305823822e-05,
      "loss": 0.0,
      "step": 4349
    },
    {
      "epoch": 1.273046532045654,
      "grad_norm": 0.00022952862491365522,
      "learning_rate": 1.817383669885865e-05,
      "loss": 0.0,
      "step": 4350
    },
    {
      "epoch": 1.273339186420837,
      "grad_norm": 2.1702842712402344,
      "learning_rate": 1.8166520339479077e-05,
      "loss": 0.0146,
      "step": 4351
    },
    {
      "epoch": 1.2736318407960199,
      "grad_norm": 0.0002519426343496889,
      "learning_rate": 1.8159203980099505e-05,
      "loss": 0.0,
      "step": 4352
    },
    {
      "epoch": 1.2739244951712028,
      "grad_norm": 0.00180053838994354,
      "learning_rate": 1.8151887620719932e-05,
      "loss": 0.0,
      "step": 4353
    },
    {
      "epoch": 1.2742171495463857,
      "grad_norm": 0.0031657288782298565,
      "learning_rate": 1.8144571261340357e-05,
      "loss": 0.0,
      "step": 4354
    },
    {
      "epoch": 1.2745098039215685,
      "grad_norm": 0.00036263116635382175,
      "learning_rate": 1.8137254901960785e-05,
      "loss": 0.0,
      "step": 4355
    },
    {
      "epoch": 1.2748024582967514,
      "grad_norm": 0.00032865680987015367,
      "learning_rate": 1.8129938542581213e-05,
      "loss": 0.0,
      "step": 4356
    },
    {
      "epoch": 1.2750951126719343,
      "grad_norm": 0.0009084104094654322,
      "learning_rate": 1.812262218320164e-05,
      "loss": 0.0,
      "step": 4357
    },
    {
      "epoch": 1.2753877670471174,
      "grad_norm": 8.174698829650879,
      "learning_rate": 1.811530582382207e-05,
      "loss": 0.2658,
      "step": 4358
    },
    {
      "epoch": 1.2756804214223003,
      "grad_norm": 0.0010587909491732717,
      "learning_rate": 1.8107989464442493e-05,
      "loss": 0.0,
      "step": 4359
    },
    {
      "epoch": 1.2759730757974832,
      "grad_norm": 0.0005852478789165616,
      "learning_rate": 1.810067310506292e-05,
      "loss": 0.0,
      "step": 4360
    },
    {
      "epoch": 1.276265730172666,
      "grad_norm": 0.0003181801876053214,
      "learning_rate": 1.809335674568335e-05,
      "loss": 0.0,
      "step": 4361
    },
    {
      "epoch": 1.276558384547849,
      "grad_norm": 0.00035449196002446115,
      "learning_rate": 1.8086040386303777e-05,
      "loss": 0.0,
      "step": 4362
    },
    {
      "epoch": 1.2768510389230319,
      "grad_norm": 0.06469757109880447,
      "learning_rate": 1.8078724026924205e-05,
      "loss": 0.0004,
      "step": 4363
    },
    {
      "epoch": 1.2771436932982148,
      "grad_norm": 0.000493619532790035,
      "learning_rate": 1.8071407667544633e-05,
      "loss": 0.0,
      "step": 4364
    },
    {
      "epoch": 1.2774363476733976,
      "grad_norm": 0.0003512586699798703,
      "learning_rate": 1.8064091308165058e-05,
      "loss": 0.0,
      "step": 4365
    },
    {
      "epoch": 1.2777290020485808,
      "grad_norm": 0.0003507339279167354,
      "learning_rate": 1.8056774948785486e-05,
      "loss": 0.0,
      "step": 4366
    },
    {
      "epoch": 1.2780216564237636,
      "grad_norm": 0.00018073168757837266,
      "learning_rate": 1.8049458589405914e-05,
      "loss": 0.0,
      "step": 4367
    },
    {
      "epoch": 1.2783143107989465,
      "grad_norm": 0.00024586240760982037,
      "learning_rate": 1.804214223002634e-05,
      "loss": 0.0,
      "step": 4368
    },
    {
      "epoch": 1.2786069651741294,
      "grad_norm": 0.0006003909511491656,
      "learning_rate": 1.803482587064677e-05,
      "loss": 0.0,
      "step": 4369
    },
    {
      "epoch": 1.2788996195493123,
      "grad_norm": 0.00019972145673818886,
      "learning_rate": 1.8027509511267194e-05,
      "loss": 0.0,
      "step": 4370
    },
    {
      "epoch": 1.2791922739244952,
      "grad_norm": 0.0001580975076649338,
      "learning_rate": 1.8020193151887622e-05,
      "loss": 0.0,
      "step": 4371
    },
    {
      "epoch": 1.279484928299678,
      "grad_norm": 0.00012921317829750478,
      "learning_rate": 1.801287679250805e-05,
      "loss": 0.0,
      "step": 4372
    },
    {
      "epoch": 1.279777582674861,
      "grad_norm": 0.0006235188920982182,
      "learning_rate": 1.8005560433128478e-05,
      "loss": 0.0,
      "step": 4373
    },
    {
      "epoch": 1.2800702370500439,
      "grad_norm": 0.00019048931426368654,
      "learning_rate": 1.7998244073748906e-05,
      "loss": 0.0,
      "step": 4374
    },
    {
      "epoch": 1.2803628914252267,
      "grad_norm": 9.730968304211274e-05,
      "learning_rate": 1.799092771436933e-05,
      "loss": 0.0,
      "step": 4375
    },
    {
      "epoch": 1.2806555458004096,
      "grad_norm": 0.00012557368609122932,
      "learning_rate": 1.798361135498976e-05,
      "loss": 0.0,
      "step": 4376
    },
    {
      "epoch": 1.2809482001755925,
      "grad_norm": 0.003751950105652213,
      "learning_rate": 1.7976294995610186e-05,
      "loss": 0.0,
      "step": 4377
    },
    {
      "epoch": 1.2812408545507754,
      "grad_norm": 0.00039506788016296923,
      "learning_rate": 1.7968978636230614e-05,
      "loss": 0.0,
      "step": 4378
    },
    {
      "epoch": 1.2815335089259585,
      "grad_norm": 0.00354623980820179,
      "learning_rate": 1.7961662276851042e-05,
      "loss": 0.0,
      "step": 4379
    },
    {
      "epoch": 1.2818261633011414,
      "grad_norm": 0.0002384902909398079,
      "learning_rate": 1.7954345917471467e-05,
      "loss": 0.0,
      "step": 4380
    },
    {
      "epoch": 1.2821188176763243,
      "grad_norm": 0.0029882891103625298,
      "learning_rate": 1.7947029558091895e-05,
      "loss": 0.0,
      "step": 4381
    },
    {
      "epoch": 1.2824114720515072,
      "grad_norm": 0.0005152951343916357,
      "learning_rate": 1.7939713198712323e-05,
      "loss": 0.0,
      "step": 4382
    },
    {
      "epoch": 1.28270412642669,
      "grad_norm": 0.03205489739775658,
      "learning_rate": 1.793239683933275e-05,
      "loss": 0.0004,
      "step": 4383
    },
    {
      "epoch": 1.282996780801873,
      "grad_norm": 0.006373089738190174,
      "learning_rate": 1.7925080479953175e-05,
      "loss": 0.0,
      "step": 4384
    },
    {
      "epoch": 1.2832894351770558,
      "grad_norm": 0.0009934831177815795,
      "learning_rate": 1.7917764120573603e-05,
      "loss": 0.0,
      "step": 4385
    },
    {
      "epoch": 1.2835820895522387,
      "grad_norm": 0.00041518089710734785,
      "learning_rate": 1.791044776119403e-05,
      "loss": 0.0,
      "step": 4386
    },
    {
      "epoch": 1.2838747439274218,
      "grad_norm": 0.00018887739861384034,
      "learning_rate": 1.790313140181446e-05,
      "loss": 0.0,
      "step": 4387
    },
    {
      "epoch": 1.2841673983026047,
      "grad_norm": 0.00019374901603441685,
      "learning_rate": 1.7895815042434884e-05,
      "loss": 0.0,
      "step": 4388
    },
    {
      "epoch": 1.2844600526777876,
      "grad_norm": 0.0006210590363480151,
      "learning_rate": 1.788849868305531e-05,
      "loss": 0.0,
      "step": 4389
    },
    {
      "epoch": 1.2847527070529705,
      "grad_norm": 0.00012727176363114268,
      "learning_rate": 1.788118232367574e-05,
      "loss": 0.0,
      "step": 4390
    },
    {
      "epoch": 1.2850453614281534,
      "grad_norm": 0.003193584969267249,
      "learning_rate": 1.7873865964296167e-05,
      "loss": 0.0,
      "step": 4391
    },
    {
      "epoch": 1.2853380158033363,
      "grad_norm": 0.00022801656450610608,
      "learning_rate": 1.7866549604916592e-05,
      "loss": 0.0,
      "step": 4392
    },
    {
      "epoch": 1.2856306701785192,
      "grad_norm": 0.004470504354685545,
      "learning_rate": 1.785923324553702e-05,
      "loss": 0.0,
      "step": 4393
    },
    {
      "epoch": 1.285923324553702,
      "grad_norm": 0.00029001777875237167,
      "learning_rate": 1.7851916886157448e-05,
      "loss": 0.0,
      "step": 4394
    },
    {
      "epoch": 1.286215978928885,
      "grad_norm": 0.0005296553717926145,
      "learning_rate": 1.7844600526777876e-05,
      "loss": 0.0,
      "step": 4395
    },
    {
      "epoch": 1.2865086333040678,
      "grad_norm": 6.382421997841448e-05,
      "learning_rate": 1.78372841673983e-05,
      "loss": 0.0,
      "step": 4396
    },
    {
      "epoch": 1.2868012876792507,
      "grad_norm": 0.0020240354351699352,
      "learning_rate": 1.782996780801873e-05,
      "loss": 0.0,
      "step": 4397
    },
    {
      "epoch": 1.2870939420544336,
      "grad_norm": 0.003384276293218136,
      "learning_rate": 1.7822651448639156e-05,
      "loss": 0.0,
      "step": 4398
    },
    {
      "epoch": 1.2873865964296165,
      "grad_norm": 0.04222164303064346,
      "learning_rate": 1.7815335089259584e-05,
      "loss": 0.0002,
      "step": 4399
    },
    {
      "epoch": 1.2876792508047996,
      "grad_norm": 1.5526560544967651,
      "learning_rate": 1.7808018729880012e-05,
      "loss": 0.0169,
      "step": 4400
    },
    {
      "epoch": 1.2879719051799825,
      "grad_norm": 0.00047081487718969584,
      "learning_rate": 1.780070237050044e-05,
      "loss": 0.0,
      "step": 4401
    },
    {
      "epoch": 1.2882645595551654,
      "grad_norm": 0.0005067844176664948,
      "learning_rate": 1.7793386011120865e-05,
      "loss": 0.0,
      "step": 4402
    },
    {
      "epoch": 1.2885572139303483,
      "grad_norm": 0.00012542780314106494,
      "learning_rate": 1.7786069651741293e-05,
      "loss": 0.0,
      "step": 4403
    },
    {
      "epoch": 1.2888498683055312,
      "grad_norm": 9.943839540937915e-05,
      "learning_rate": 1.777875329236172e-05,
      "loss": 0.0,
      "step": 4404
    },
    {
      "epoch": 1.289142522680714,
      "grad_norm": 7.087265968322754,
      "learning_rate": 1.777143693298215e-05,
      "loss": 0.2037,
      "step": 4405
    },
    {
      "epoch": 1.289435177055897,
      "grad_norm": 0.00020516323274932802,
      "learning_rate": 1.7764120573602577e-05,
      "loss": 0.0,
      "step": 4406
    },
    {
      "epoch": 1.2897278314310798,
      "grad_norm": 0.0007501508225686848,
      "learning_rate": 1.7756804214223e-05,
      "loss": 0.0,
      "step": 4407
    },
    {
      "epoch": 1.290020485806263,
      "grad_norm": 0.0001918547204695642,
      "learning_rate": 1.774948785484343e-05,
      "loss": 0.0,
      "step": 4408
    },
    {
      "epoch": 1.2903131401814458,
      "grad_norm": 0.0006179132033139467,
      "learning_rate": 1.7742171495463857e-05,
      "loss": 0.0,
      "step": 4409
    },
    {
      "epoch": 1.2906057945566287,
      "grad_norm": 0.00027921059518121183,
      "learning_rate": 1.7734855136084285e-05,
      "loss": 0.0,
      "step": 4410
    },
    {
      "epoch": 1.2908984489318116,
      "grad_norm": 6.703787221340463e-05,
      "learning_rate": 1.7727538776704713e-05,
      "loss": 0.0,
      "step": 4411
    },
    {
      "epoch": 1.2911911033069945,
      "grad_norm": 0.00042909561307169497,
      "learning_rate": 1.7720222417325137e-05,
      "loss": 0.0,
      "step": 4412
    },
    {
      "epoch": 1.2914837576821774,
      "grad_norm": 0.0004101863887626678,
      "learning_rate": 1.7712906057945565e-05,
      "loss": 0.0,
      "step": 4413
    },
    {
      "epoch": 1.2917764120573603,
      "grad_norm": 0.0006459795404225588,
      "learning_rate": 1.7705589698565993e-05,
      "loss": 0.0,
      "step": 4414
    },
    {
      "epoch": 1.2920690664325432,
      "grad_norm": 0.0002578320854809135,
      "learning_rate": 1.769827333918642e-05,
      "loss": 0.0,
      "step": 4415
    },
    {
      "epoch": 1.292361720807726,
      "grad_norm": 0.0011339564807713032,
      "learning_rate": 1.769095697980685e-05,
      "loss": 0.0,
      "step": 4416
    },
    {
      "epoch": 1.292654375182909,
      "grad_norm": 0.015644166618585587,
      "learning_rate": 1.7683640620427277e-05,
      "loss": 0.0001,
      "step": 4417
    },
    {
      "epoch": 1.2929470295580918,
      "grad_norm": 0.0011464759008958936,
      "learning_rate": 1.7676324261047702e-05,
      "loss": 0.0,
      "step": 4418
    },
    {
      "epoch": 1.2932396839332747,
      "grad_norm": 0.0006816753302700818,
      "learning_rate": 1.766900790166813e-05,
      "loss": 0.0,
      "step": 4419
    },
    {
      "epoch": 1.2935323383084576,
      "grad_norm": 11.788400650024414,
      "learning_rate": 1.7661691542288558e-05,
      "loss": 0.0448,
      "step": 4420
    },
    {
      "epoch": 1.2938249926836405,
      "grad_norm": 0.014803797006607056,
      "learning_rate": 1.7654375182908986e-05,
      "loss": 0.0001,
      "step": 4421
    },
    {
      "epoch": 1.2941176470588236,
      "grad_norm": 0.00040439984877593815,
      "learning_rate": 1.7647058823529414e-05,
      "loss": 0.0,
      "step": 4422
    },
    {
      "epoch": 1.2944103014340065,
      "grad_norm": 9.866422653198242,
      "learning_rate": 1.7639742464149838e-05,
      "loss": 0.0453,
      "step": 4423
    },
    {
      "epoch": 1.2947029558091894,
      "grad_norm": 0.04085865989327431,
      "learning_rate": 1.7632426104770266e-05,
      "loss": 0.0002,
      "step": 4424
    },
    {
      "epoch": 1.2949956101843723,
      "grad_norm": 0.006987076718360186,
      "learning_rate": 1.7625109745390694e-05,
      "loss": 0.0001,
      "step": 4425
    },
    {
      "epoch": 1.2952882645595551,
      "grad_norm": 0.01172039844095707,
      "learning_rate": 1.7617793386011122e-05,
      "loss": 0.0001,
      "step": 4426
    },
    {
      "epoch": 1.295580918934738,
      "grad_norm": 0.0010197163792327046,
      "learning_rate": 1.761047702663155e-05,
      "loss": 0.0,
      "step": 4427
    },
    {
      "epoch": 1.295873573309921,
      "grad_norm": 0.0006449169595725834,
      "learning_rate": 1.7603160667251975e-05,
      "loss": 0.0,
      "step": 4428
    },
    {
      "epoch": 1.2961662276851038,
      "grad_norm": 0.011789483949542046,
      "learning_rate": 1.7595844307872402e-05,
      "loss": 0.0001,
      "step": 4429
    },
    {
      "epoch": 1.296458882060287,
      "grad_norm": 0.001827823929488659,
      "learning_rate": 1.758852794849283e-05,
      "loss": 0.0,
      "step": 4430
    },
    {
      "epoch": 1.2967515364354698,
      "grad_norm": 0.00024653668515384197,
      "learning_rate": 1.758121158911326e-05,
      "loss": 0.0,
      "step": 4431
    },
    {
      "epoch": 1.2970441908106527,
      "grad_norm": 0.03542637452483177,
      "learning_rate": 1.7573895229733686e-05,
      "loss": 0.0002,
      "step": 4432
    },
    {
      "epoch": 1.2973368451858356,
      "grad_norm": 0.04266812279820442,
      "learning_rate": 1.7566578870354114e-05,
      "loss": 0.0002,
      "step": 4433
    },
    {
      "epoch": 1.2976294995610185,
      "grad_norm": 0.007048277650028467,
      "learning_rate": 1.755926251097454e-05,
      "loss": 0.0001,
      "step": 4434
    },
    {
      "epoch": 1.2979221539362014,
      "grad_norm": 0.007927828468382359,
      "learning_rate": 1.7551946151594967e-05,
      "loss": 0.0001,
      "step": 4435
    },
    {
      "epoch": 1.2982148083113842,
      "grad_norm": 0.004960792139172554,
      "learning_rate": 1.7544629792215395e-05,
      "loss": 0.0,
      "step": 4436
    },
    {
      "epoch": 1.2985074626865671,
      "grad_norm": 0.02081185020506382,
      "learning_rate": 1.7537313432835823e-05,
      "loss": 0.0002,
      "step": 4437
    },
    {
      "epoch": 1.29880011706175,
      "grad_norm": 0.06372889876365662,
      "learning_rate": 1.752999707345625e-05,
      "loss": 0.0003,
      "step": 4438
    },
    {
      "epoch": 1.299092771436933,
      "grad_norm": 0.0006737270741723478,
      "learning_rate": 1.7522680714076675e-05,
      "loss": 0.0,
      "step": 4439
    },
    {
      "epoch": 1.2993854258121158,
      "grad_norm": 0.012304473668336868,
      "learning_rate": 1.7515364354697103e-05,
      "loss": 0.0001,
      "step": 4440
    },
    {
      "epoch": 1.2996780801872987,
      "grad_norm": 0.06632810086011887,
      "learning_rate": 1.750804799531753e-05,
      "loss": 0.0003,
      "step": 4441
    },
    {
      "epoch": 1.2999707345624816,
      "grad_norm": 0.0013256085803732276,
      "learning_rate": 1.750073163593796e-05,
      "loss": 0.0,
      "step": 4442
    },
    {
      "epoch": 1.3002633889376647,
      "grad_norm": 0.006500875577330589,
      "learning_rate": 1.7493415276558387e-05,
      "loss": 0.0001,
      "step": 4443
    },
    {
      "epoch": 1.3005560433128476,
      "grad_norm": 0.00017142921569757164,
      "learning_rate": 1.748609891717881e-05,
      "loss": 0.0,
      "step": 4444
    },
    {
      "epoch": 1.3008486976880305,
      "grad_norm": 0.00018845274462364614,
      "learning_rate": 1.747878255779924e-05,
      "loss": 0.0,
      "step": 4445
    },
    {
      "epoch": 1.3011413520632134,
      "grad_norm": 0.00017048002337105572,
      "learning_rate": 1.7471466198419667e-05,
      "loss": 0.0,
      "step": 4446
    },
    {
      "epoch": 1.3014340064383962,
      "grad_norm": 0.00018671387806534767,
      "learning_rate": 1.7464149839040095e-05,
      "loss": 0.0,
      "step": 4447
    },
    {
      "epoch": 1.3017266608135791,
      "grad_norm": 0.001127854106016457,
      "learning_rate": 1.7456833479660523e-05,
      "loss": 0.0,
      "step": 4448
    },
    {
      "epoch": 1.302019315188762,
      "grad_norm": 0.5655648112297058,
      "learning_rate": 1.7449517120280948e-05,
      "loss": 0.003,
      "step": 4449
    },
    {
      "epoch": 1.302311969563945,
      "grad_norm": 0.0004359325102996081,
      "learning_rate": 1.7442200760901376e-05,
      "loss": 0.0,
      "step": 4450
    },
    {
      "epoch": 1.302604623939128,
      "grad_norm": 5.662459373474121,
      "learning_rate": 1.7434884401521804e-05,
      "loss": 0.0189,
      "step": 4451
    },
    {
      "epoch": 1.302897278314311,
      "grad_norm": 0.029793256893754005,
      "learning_rate": 1.7427568042142232e-05,
      "loss": 0.0002,
      "step": 4452
    },
    {
      "epoch": 1.3031899326894938,
      "grad_norm": 0.0007705074967816472,
      "learning_rate": 1.742025168276266e-05,
      "loss": 0.0,
      "step": 4453
    },
    {
      "epoch": 1.3034825870646767,
      "grad_norm": 0.000265125825535506,
      "learning_rate": 1.7412935323383088e-05,
      "loss": 0.0,
      "step": 4454
    },
    {
      "epoch": 1.3037752414398596,
      "grad_norm": 0.1741778403520584,
      "learning_rate": 1.7405618964003512e-05,
      "loss": 0.0006,
      "step": 4455
    },
    {
      "epoch": 1.3040678958150425,
      "grad_norm": 0.006701210513710976,
      "learning_rate": 1.739830260462394e-05,
      "loss": 0.0001,
      "step": 4456
    },
    {
      "epoch": 1.3043605501902253,
      "grad_norm": 0.07441215962171555,
      "learning_rate": 1.7390986245244368e-05,
      "loss": 0.0003,
      "step": 4457
    },
    {
      "epoch": 1.3046532045654082,
      "grad_norm": 0.0002838452346622944,
      "learning_rate": 1.7383669885864796e-05,
      "loss": 0.0,
      "step": 4458
    },
    {
      "epoch": 1.3049458589405911,
      "grad_norm": 0.000586601672694087,
      "learning_rate": 1.7376353526485224e-05,
      "loss": 0.0,
      "step": 4459
    },
    {
      "epoch": 1.305238513315774,
      "grad_norm": 0.0004960218211635947,
      "learning_rate": 1.736903716710565e-05,
      "loss": 0.0,
      "step": 4460
    },
    {
      "epoch": 1.305531167690957,
      "grad_norm": 0.0006264621042646468,
      "learning_rate": 1.7361720807726077e-05,
      "loss": 0.0,
      "step": 4461
    },
    {
      "epoch": 1.3058238220661398,
      "grad_norm": 0.0017462418181821704,
      "learning_rate": 1.7354404448346505e-05,
      "loss": 0.0,
      "step": 4462
    },
    {
      "epoch": 1.3061164764413227,
      "grad_norm": 0.0009365348960272968,
      "learning_rate": 1.7347088088966932e-05,
      "loss": 0.0,
      "step": 4463
    },
    {
      "epoch": 1.3064091308165058,
      "grad_norm": 0.0006757163209840655,
      "learning_rate": 1.733977172958736e-05,
      "loss": 0.0,
      "step": 4464
    },
    {
      "epoch": 1.3067017851916887,
      "grad_norm": 0.0011337717296555638,
      "learning_rate": 1.7332455370207785e-05,
      "loss": 0.0,
      "step": 4465
    },
    {
      "epoch": 1.3069944395668716,
      "grad_norm": 0.0008460487588308752,
      "learning_rate": 1.7325139010828213e-05,
      "loss": 0.0,
      "step": 4466
    },
    {
      "epoch": 1.3072870939420544,
      "grad_norm": 0.0015477667329832911,
      "learning_rate": 1.731782265144864e-05,
      "loss": 0.0,
      "step": 4467
    },
    {
      "epoch": 1.3075797483172373,
      "grad_norm": 0.004709722939878702,
      "learning_rate": 1.731050629206907e-05,
      "loss": 0.0001,
      "step": 4468
    },
    {
      "epoch": 1.3078724026924202,
      "grad_norm": 0.0004866288509219885,
      "learning_rate": 1.7303189932689497e-05,
      "loss": 0.0,
      "step": 4469
    },
    {
      "epoch": 1.308165057067603,
      "grad_norm": 0.003755094949156046,
      "learning_rate": 1.7295873573309925e-05,
      "loss": 0.0,
      "step": 4470
    },
    {
      "epoch": 1.308457711442786,
      "grad_norm": 0.1101662740111351,
      "learning_rate": 1.728855721393035e-05,
      "loss": 0.0003,
      "step": 4471
    },
    {
      "epoch": 1.308750365817969,
      "grad_norm": 0.0001950179139385,
      "learning_rate": 1.7281240854550777e-05,
      "loss": 0.0,
      "step": 4472
    },
    {
      "epoch": 1.309043020193152,
      "grad_norm": 0.005900123622268438,
      "learning_rate": 1.7273924495171205e-05,
      "loss": 0.0001,
      "step": 4473
    },
    {
      "epoch": 1.3093356745683349,
      "grad_norm": 0.005997837986797094,
      "learning_rate": 1.7266608135791633e-05,
      "loss": 0.0001,
      "step": 4474
    },
    {
      "epoch": 1.3096283289435178,
      "grad_norm": 0.00015216268366202712,
      "learning_rate": 1.7259291776412058e-05,
      "loss": 0.0,
      "step": 4475
    },
    {
      "epoch": 1.3099209833187007,
      "grad_norm": 0.6806633472442627,
      "learning_rate": 1.7251975417032486e-05,
      "loss": 0.0016,
      "step": 4476
    },
    {
      "epoch": 1.3102136376938835,
      "grad_norm": 0.007385977543890476,
      "learning_rate": 1.7244659057652914e-05,
      "loss": 0.0001,
      "step": 4477
    },
    {
      "epoch": 1.3105062920690664,
      "grad_norm": 0.001072707585990429,
      "learning_rate": 1.723734269827334e-05,
      "loss": 0.0,
      "step": 4478
    },
    {
      "epoch": 1.3107989464442493,
      "grad_norm": 0.0015393621288239956,
      "learning_rate": 1.7230026338893766e-05,
      "loss": 0.0,
      "step": 4479
    },
    {
      "epoch": 1.3110916008194322,
      "grad_norm": 0.0010753574315458536,
      "learning_rate": 1.7222709979514194e-05,
      "loss": 0.0,
      "step": 4480
    },
    {
      "epoch": 1.311384255194615,
      "grad_norm": 0.00011825386172858998,
      "learning_rate": 1.7215393620134622e-05,
      "loss": 0.0,
      "step": 4481
    },
    {
      "epoch": 1.311676909569798,
      "grad_norm": 0.0003599435440264642,
      "learning_rate": 1.720807726075505e-05,
      "loss": 0.0,
      "step": 4482
    },
    {
      "epoch": 1.3119695639449809,
      "grad_norm": 0.0006308953743427992,
      "learning_rate": 1.7200760901375475e-05,
      "loss": 0.0,
      "step": 4483
    },
    {
      "epoch": 1.3122622183201638,
      "grad_norm": 0.002207934157922864,
      "learning_rate": 1.7193444541995902e-05,
      "loss": 0.0,
      "step": 4484
    },
    {
      "epoch": 1.3125548726953469,
      "grad_norm": 0.00015759652887936682,
      "learning_rate": 1.718612818261633e-05,
      "loss": 0.0,
      "step": 4485
    },
    {
      "epoch": 1.3128475270705298,
      "grad_norm": 0.0001810126268537715,
      "learning_rate": 1.717881182323676e-05,
      "loss": 0.0,
      "step": 4486
    },
    {
      "epoch": 1.3131401814457127,
      "grad_norm": 9.173175811767578,
      "learning_rate": 1.7171495463857183e-05,
      "loss": 0.1658,
      "step": 4487
    },
    {
      "epoch": 1.3134328358208955,
      "grad_norm": 0.00019536117906682193,
      "learning_rate": 1.716417910447761e-05,
      "loss": 0.0,
      "step": 4488
    },
    {
      "epoch": 1.3137254901960784,
      "grad_norm": 0.00016868358943611383,
      "learning_rate": 1.715686274509804e-05,
      "loss": 0.0,
      "step": 4489
    },
    {
      "epoch": 1.3140181445712613,
      "grad_norm": 0.0006458146963268518,
      "learning_rate": 1.7149546385718467e-05,
      "loss": 0.0,
      "step": 4490
    },
    {
      "epoch": 1.3143107989464442,
      "grad_norm": 0.00034699728712439537,
      "learning_rate": 1.7142230026338895e-05,
      "loss": 0.0,
      "step": 4491
    },
    {
      "epoch": 1.314603453321627,
      "grad_norm": 0.011271150782704353,
      "learning_rate": 1.713491366695932e-05,
      "loss": 0.0001,
      "step": 4492
    },
    {
      "epoch": 1.3148961076968102,
      "grad_norm": 0.0006695784977637231,
      "learning_rate": 1.7127597307579747e-05,
      "loss": 0.0,
      "step": 4493
    },
    {
      "epoch": 1.315188762071993,
      "grad_norm": 0.001160523621365428,
      "learning_rate": 1.7120280948200175e-05,
      "loss": 0.0,
      "step": 4494
    },
    {
      "epoch": 1.315481416447176,
      "grad_norm": 0.0007655543158762157,
      "learning_rate": 1.7112964588820603e-05,
      "loss": 0.0,
      "step": 4495
    },
    {
      "epoch": 1.3157740708223589,
      "grad_norm": 11.203448295593262,
      "learning_rate": 1.710564822944103e-05,
      "loss": 0.037,
      "step": 4496
    },
    {
      "epoch": 1.3160667251975418,
      "grad_norm": 0.00023826721007935703,
      "learning_rate": 1.7098331870061456e-05,
      "loss": 0.0,
      "step": 4497
    },
    {
      "epoch": 1.3163593795727246,
      "grad_norm": 0.0005640920717269182,
      "learning_rate": 1.7091015510681884e-05,
      "loss": 0.0,
      "step": 4498
    },
    {
      "epoch": 1.3166520339479075,
      "grad_norm": 0.00016525565297342837,
      "learning_rate": 1.708369915130231e-05,
      "loss": 0.0,
      "step": 4499
    },
    {
      "epoch": 1.3169446883230904,
      "grad_norm": 0.0005659122252836823,
      "learning_rate": 1.707638279192274e-05,
      "loss": 0.0,
      "step": 4500
    },
    {
      "epoch": 1.3172373426982733,
      "grad_norm": 0.00027133754338137805,
      "learning_rate": 1.7069066432543167e-05,
      "loss": 0.0,
      "step": 4501
    },
    {
      "epoch": 1.3175299970734562,
      "grad_norm": 0.0008626925409771502,
      "learning_rate": 1.7061750073163595e-05,
      "loss": 0.0,
      "step": 4502
    },
    {
      "epoch": 1.317822651448639,
      "grad_norm": 0.0005522826686501503,
      "learning_rate": 1.705443371378402e-05,
      "loss": 0.0,
      "step": 4503
    },
    {
      "epoch": 1.318115305823822,
      "grad_norm": 0.00013968671555630863,
      "learning_rate": 1.7047117354404448e-05,
      "loss": 0.0,
      "step": 4504
    },
    {
      "epoch": 1.3184079601990049,
      "grad_norm": 0.00545914564281702,
      "learning_rate": 1.7039800995024876e-05,
      "loss": 0.0,
      "step": 4505
    },
    {
      "epoch": 1.3187006145741877,
      "grad_norm": 0.0023491347674280405,
      "learning_rate": 1.7032484635645304e-05,
      "loss": 0.0,
      "step": 4506
    },
    {
      "epoch": 1.3189932689493709,
      "grad_norm": 0.00017365990788675845,
      "learning_rate": 1.7025168276265732e-05,
      "loss": 0.0,
      "step": 4507
    },
    {
      "epoch": 1.3192859233245537,
      "grad_norm": 0.0003467008355073631,
      "learning_rate": 1.7017851916886156e-05,
      "loss": 0.0,
      "step": 4508
    },
    {
      "epoch": 1.3195785776997366,
      "grad_norm": 0.0010194872738793492,
      "learning_rate": 1.7010535557506584e-05,
      "loss": 0.0,
      "step": 4509
    },
    {
      "epoch": 1.3198712320749195,
      "grad_norm": 0.00044863088987767696,
      "learning_rate": 1.7003219198127012e-05,
      "loss": 0.0,
      "step": 4510
    },
    {
      "epoch": 1.3201638864501024,
      "grad_norm": 0.0003877152921631932,
      "learning_rate": 1.699590283874744e-05,
      "loss": 0.0,
      "step": 4511
    },
    {
      "epoch": 1.3204565408252853,
      "grad_norm": 0.0001742753229336813,
      "learning_rate": 1.6988586479367868e-05,
      "loss": 0.0,
      "step": 4512
    },
    {
      "epoch": 1.3207491952004682,
      "grad_norm": 0.00042221200419589877,
      "learning_rate": 1.6981270119988293e-05,
      "loss": 0.0,
      "step": 4513
    },
    {
      "epoch": 1.3210418495756513,
      "grad_norm": 0.0014971998753026128,
      "learning_rate": 1.697395376060872e-05,
      "loss": 0.0,
      "step": 4514
    },
    {
      "epoch": 1.3213345039508342,
      "grad_norm": 0.0009239388746209443,
      "learning_rate": 1.696663740122915e-05,
      "loss": 0.0,
      "step": 4515
    },
    {
      "epoch": 1.321627158326017,
      "grad_norm": 0.0002854466438293457,
      "learning_rate": 1.6959321041849577e-05,
      "loss": 0.0,
      "step": 4516
    },
    {
      "epoch": 1.3219198127012,
      "grad_norm": 0.00314055266790092,
      "learning_rate": 1.6952004682470005e-05,
      "loss": 0.0,
      "step": 4517
    },
    {
      "epoch": 1.3222124670763828,
      "grad_norm": 0.00027303065871819854,
      "learning_rate": 1.694468832309043e-05,
      "loss": 0.0,
      "step": 4518
    },
    {
      "epoch": 1.3225051214515657,
      "grad_norm": 0.0009044178295880556,
      "learning_rate": 1.6937371963710857e-05,
      "loss": 0.0,
      "step": 4519
    },
    {
      "epoch": 1.3227977758267486,
      "grad_norm": 0.0003398100088816136,
      "learning_rate": 1.6930055604331285e-05,
      "loss": 0.0,
      "step": 4520
    },
    {
      "epoch": 1.3230904302019315,
      "grad_norm": 0.0007959523354656994,
      "learning_rate": 1.6922739244951713e-05,
      "loss": 0.0,
      "step": 4521
    },
    {
      "epoch": 1.3233830845771144,
      "grad_norm": 0.00034519078326411545,
      "learning_rate": 1.691542288557214e-05,
      "loss": 0.0,
      "step": 4522
    },
    {
      "epoch": 1.3236757389522973,
      "grad_norm": 0.0001793931587599218,
      "learning_rate": 1.690810652619257e-05,
      "loss": 0.0,
      "step": 4523
    },
    {
      "epoch": 1.3239683933274802,
      "grad_norm": 0.00026595251983962953,
      "learning_rate": 1.6900790166812993e-05,
      "loss": 0.0,
      "step": 4524
    },
    {
      "epoch": 1.324261047702663,
      "grad_norm": 0.00012598829925991595,
      "learning_rate": 1.689347380743342e-05,
      "loss": 0.0,
      "step": 4525
    },
    {
      "epoch": 1.324553702077846,
      "grad_norm": 0.00086337880929932,
      "learning_rate": 1.688615744805385e-05,
      "loss": 0.0,
      "step": 4526
    },
    {
      "epoch": 1.3248463564530288,
      "grad_norm": 0.0017857812345027924,
      "learning_rate": 1.6878841088674277e-05,
      "loss": 0.0,
      "step": 4527
    },
    {
      "epoch": 1.325139010828212,
      "grad_norm": 0.000812956306617707,
      "learning_rate": 1.6871524729294705e-05,
      "loss": 0.0,
      "step": 4528
    },
    {
      "epoch": 1.3254316652033948,
      "grad_norm": 0.001117186970077455,
      "learning_rate": 1.686420836991513e-05,
      "loss": 0.0,
      "step": 4529
    },
    {
      "epoch": 1.3257243195785777,
      "grad_norm": 0.0001848274696385488,
      "learning_rate": 1.6856892010535558e-05,
      "loss": 0.0,
      "step": 4530
    },
    {
      "epoch": 1.3260169739537606,
      "grad_norm": 0.0008892636396922171,
      "learning_rate": 1.6849575651155986e-05,
      "loss": 0.0,
      "step": 4531
    },
    {
      "epoch": 1.3263096283289435,
      "grad_norm": 0.0011231438256800175,
      "learning_rate": 1.6842259291776414e-05,
      "loss": 0.0,
      "step": 4532
    },
    {
      "epoch": 1.3266022827041264,
      "grad_norm": 0.0030756371561437845,
      "learning_rate": 1.683494293239684e-05,
      "loss": 0.0,
      "step": 4533
    },
    {
      "epoch": 1.3268949370793093,
      "grad_norm": 0.00010932189616141841,
      "learning_rate": 1.6827626573017266e-05,
      "loss": 0.0,
      "step": 4534
    },
    {
      "epoch": 1.3271875914544922,
      "grad_norm": 0.0002901269472204149,
      "learning_rate": 1.6820310213637694e-05,
      "loss": 0.0,
      "step": 4535
    },
    {
      "epoch": 1.3274802458296753,
      "grad_norm": 0.0020996062085032463,
      "learning_rate": 1.6812993854258122e-05,
      "loss": 0.0,
      "step": 4536
    },
    {
      "epoch": 1.3277729002048582,
      "grad_norm": 0.0007693211082369089,
      "learning_rate": 1.680567749487855e-05,
      "loss": 0.0,
      "step": 4537
    },
    {
      "epoch": 1.328065554580041,
      "grad_norm": 0.00048650274402461946,
      "learning_rate": 1.6798361135498978e-05,
      "loss": 0.0,
      "step": 4538
    },
    {
      "epoch": 1.328358208955224,
      "grad_norm": 0.0013929984997957945,
      "learning_rate": 1.6791044776119406e-05,
      "loss": 0.0,
      "step": 4539
    },
    {
      "epoch": 1.3286508633304068,
      "grad_norm": 0.00036329045542515814,
      "learning_rate": 1.678372841673983e-05,
      "loss": 0.0,
      "step": 4540
    },
    {
      "epoch": 1.3289435177055897,
      "grad_norm": 0.0011186026968061924,
      "learning_rate": 1.677641205736026e-05,
      "loss": 0.0,
      "step": 4541
    },
    {
      "epoch": 1.3292361720807726,
      "grad_norm": 0.0006345548899844289,
      "learning_rate": 1.6769095697980686e-05,
      "loss": 0.0,
      "step": 4542
    },
    {
      "epoch": 1.3295288264559555,
      "grad_norm": 0.0022853300906717777,
      "learning_rate": 1.6761779338601114e-05,
      "loss": 0.0,
      "step": 4543
    },
    {
      "epoch": 1.3298214808311384,
      "grad_norm": 0.0003682018432300538,
      "learning_rate": 1.6754462979221542e-05,
      "loss": 0.0,
      "step": 4544
    },
    {
      "epoch": 1.3301141352063213,
      "grad_norm": 0.001419796491973102,
      "learning_rate": 1.6747146619841967e-05,
      "loss": 0.0,
      "step": 4545
    },
    {
      "epoch": 1.3304067895815042,
      "grad_norm": 0.0007176802610047162,
      "learning_rate": 1.6739830260462395e-05,
      "loss": 0.0,
      "step": 4546
    },
    {
      "epoch": 1.330699443956687,
      "grad_norm": 0.007843074388802052,
      "learning_rate": 1.6732513901082823e-05,
      "loss": 0.0001,
      "step": 4547
    },
    {
      "epoch": 1.33099209833187,
      "grad_norm": 0.00015430124767590314,
      "learning_rate": 1.672519754170325e-05,
      "loss": 0.0,
      "step": 4548
    },
    {
      "epoch": 1.331284752707053,
      "grad_norm": 0.0009809049079194665,
      "learning_rate": 1.671788118232368e-05,
      "loss": 0.0,
      "step": 4549
    },
    {
      "epoch": 1.331577407082236,
      "grad_norm": 0.0005149840144440532,
      "learning_rate": 1.6710564822944103e-05,
      "loss": 0.0,
      "step": 4550
    },
    {
      "epoch": 1.3318700614574188,
      "grad_norm": 0.0008740427438169718,
      "learning_rate": 1.670324846356453e-05,
      "loss": 0.0,
      "step": 4551
    },
    {
      "epoch": 1.3321627158326017,
      "grad_norm": 0.00028420190210454166,
      "learning_rate": 1.669593210418496e-05,
      "loss": 0.0,
      "step": 4552
    },
    {
      "epoch": 1.3324553702077846,
      "grad_norm": 0.000594905752222985,
      "learning_rate": 1.6688615744805387e-05,
      "loss": 0.0,
      "step": 4553
    },
    {
      "epoch": 1.3327480245829675,
      "grad_norm": 0.025138122960925102,
      "learning_rate": 1.6681299385425815e-05,
      "loss": 0.0001,
      "step": 4554
    },
    {
      "epoch": 1.3330406789581504,
      "grad_norm": 0.0018326579593122005,
      "learning_rate": 1.6673983026046243e-05,
      "loss": 0.0,
      "step": 4555
    },
    {
      "epoch": 1.3333333333333333,
      "grad_norm": 0.0002547812182456255,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 0.0,
      "step": 4556
    },
    {
      "epoch": 1.3336259877085164,
      "grad_norm": 0.00041645910823717713,
      "learning_rate": 1.6659350307287095e-05,
      "loss": 0.0,
      "step": 4557
    },
    {
      "epoch": 1.3339186420836993,
      "grad_norm": 0.0005207156063988805,
      "learning_rate": 1.6652033947907523e-05,
      "loss": 0.0,
      "step": 4558
    },
    {
      "epoch": 1.3342112964588821,
      "grad_norm": 0.01128105353564024,
      "learning_rate": 1.664471758852795e-05,
      "loss": 0.0001,
      "step": 4559
    },
    {
      "epoch": 1.334503950834065,
      "grad_norm": 0.0005704149371013045,
      "learning_rate": 1.663740122914838e-05,
      "loss": 0.0,
      "step": 4560
    },
    {
      "epoch": 1.334796605209248,
      "grad_norm": 0.004358152858912945,
      "learning_rate": 1.6630084869768804e-05,
      "loss": 0.0,
      "step": 4561
    },
    {
      "epoch": 1.3350892595844308,
      "grad_norm": 0.0003605438978411257,
      "learning_rate": 1.6622768510389232e-05,
      "loss": 0.0,
      "step": 4562
    },
    {
      "epoch": 1.3353819139596137,
      "grad_norm": 0.00022131146397441626,
      "learning_rate": 1.661545215100966e-05,
      "loss": 0.0,
      "step": 4563
    },
    {
      "epoch": 1.3356745683347966,
      "grad_norm": 0.00019689615874085575,
      "learning_rate": 1.6608135791630088e-05,
      "loss": 0.0,
      "step": 4564
    },
    {
      "epoch": 1.3359672227099795,
      "grad_norm": 0.00034774577943608165,
      "learning_rate": 1.6600819432250516e-05,
      "loss": 0.0,
      "step": 4565
    },
    {
      "epoch": 1.3362598770851624,
      "grad_norm": 0.0001815056020859629,
      "learning_rate": 1.659350307287094e-05,
      "loss": 0.0,
      "step": 4566
    },
    {
      "epoch": 1.3365525314603453,
      "grad_norm": 0.00022513096337206662,
      "learning_rate": 1.6586186713491368e-05,
      "loss": 0.0,
      "step": 4567
    },
    {
      "epoch": 1.3368451858355281,
      "grad_norm": 0.005451194941997528,
      "learning_rate": 1.6578870354111796e-05,
      "loss": 0.0,
      "step": 4568
    },
    {
      "epoch": 1.337137840210711,
      "grad_norm": 2.7392542362213135,
      "learning_rate": 1.6571553994732224e-05,
      "loss": 0.2519,
      "step": 4569
    },
    {
      "epoch": 1.3374304945858941,
      "grad_norm": 0.0004196388181298971,
      "learning_rate": 1.656423763535265e-05,
      "loss": 0.0,
      "step": 4570
    },
    {
      "epoch": 1.337723148961077,
      "grad_norm": 0.0005751379649154842,
      "learning_rate": 1.6556921275973077e-05,
      "loss": 0.0,
      "step": 4571
    },
    {
      "epoch": 1.33801580333626,
      "grad_norm": 0.0018512074602767825,
      "learning_rate": 1.6549604916593505e-05,
      "loss": 0.0,
      "step": 4572
    },
    {
      "epoch": 1.3383084577114428,
      "grad_norm": 0.0012926749186590314,
      "learning_rate": 1.6542288557213932e-05,
      "loss": 0.0,
      "step": 4573
    },
    {
      "epoch": 1.3386011120866257,
      "grad_norm": 0.004534748382866383,
      "learning_rate": 1.6534972197834357e-05,
      "loss": 0.0001,
      "step": 4574
    },
    {
      "epoch": 1.3388937664618086,
      "grad_norm": 0.0014039897359907627,
      "learning_rate": 1.6527655838454785e-05,
      "loss": 0.0,
      "step": 4575
    },
    {
      "epoch": 1.3391864208369915,
      "grad_norm": 0.002373710973188281,
      "learning_rate": 1.6520339479075213e-05,
      "loss": 0.0,
      "step": 4576
    },
    {
      "epoch": 1.3394790752121744,
      "grad_norm": 3.6035537719726562,
      "learning_rate": 1.651302311969564e-05,
      "loss": 0.0175,
      "step": 4577
    },
    {
      "epoch": 1.3397717295873575,
      "grad_norm": 0.6810202598571777,
      "learning_rate": 1.6505706760316065e-05,
      "loss": 0.0031,
      "step": 4578
    },
    {
      "epoch": 1.3400643839625404,
      "grad_norm": 0.02525263838469982,
      "learning_rate": 1.6498390400936493e-05,
      "loss": 0.0003,
      "step": 4579
    },
    {
      "epoch": 1.3403570383377232,
      "grad_norm": 0.015978388488292694,
      "learning_rate": 1.649107404155692e-05,
      "loss": 0.0002,
      "step": 4580
    },
    {
      "epoch": 1.3406496927129061,
      "grad_norm": 0.018606310710310936,
      "learning_rate": 1.648375768217735e-05,
      "loss": 0.0002,
      "step": 4581
    },
    {
      "epoch": 1.340942347088089,
      "grad_norm": 0.0010876130545511842,
      "learning_rate": 1.6476441322797774e-05,
      "loss": 0.0,
      "step": 4582
    },
    {
      "epoch": 1.341235001463272,
      "grad_norm": 0.0028584327083081007,
      "learning_rate": 1.6469124963418202e-05,
      "loss": 0.0,
      "step": 4583
    },
    {
      "epoch": 1.3415276558384548,
      "grad_norm": 0.003823688719421625,
      "learning_rate": 1.646180860403863e-05,
      "loss": 0.0001,
      "step": 4584
    },
    {
      "epoch": 1.3418203102136377,
      "grad_norm": 0.0016548007261008024,
      "learning_rate": 1.6454492244659058e-05,
      "loss": 0.0,
      "step": 4585
    },
    {
      "epoch": 1.3421129645888206,
      "grad_norm": 0.0022606924176216125,
      "learning_rate": 1.6447175885279486e-05,
      "loss": 0.0,
      "step": 4586
    },
    {
      "epoch": 1.3424056189640035,
      "grad_norm": 0.5410664081573486,
      "learning_rate": 1.643985952589991e-05,
      "loss": 0.0023,
      "step": 4587
    },
    {
      "epoch": 1.3426982733391863,
      "grad_norm": 0.0010367278009653091,
      "learning_rate": 1.6432543166520338e-05,
      "loss": 0.0,
      "step": 4588
    },
    {
      "epoch": 1.3429909277143692,
      "grad_norm": 0.0008697782177478075,
      "learning_rate": 1.6425226807140766e-05,
      "loss": 0.0,
      "step": 4589
    },
    {
      "epoch": 1.3432835820895521,
      "grad_norm": 0.0026082817930728197,
      "learning_rate": 1.6417910447761194e-05,
      "loss": 0.0001,
      "step": 4590
    },
    {
      "epoch": 1.3435762364647352,
      "grad_norm": 0.007427962962538004,
      "learning_rate": 1.6410594088381622e-05,
      "loss": 0.0001,
      "step": 4591
    },
    {
      "epoch": 1.3438688908399181,
      "grad_norm": 0.0020755648147314787,
      "learning_rate": 1.640327772900205e-05,
      "loss": 0.0,
      "step": 4592
    },
    {
      "epoch": 1.344161545215101,
      "grad_norm": 3.129110813140869,
      "learning_rate": 1.6395961369622475e-05,
      "loss": 0.021,
      "step": 4593
    },
    {
      "epoch": 1.344454199590284,
      "grad_norm": 0.004509671591222286,
      "learning_rate": 1.6388645010242902e-05,
      "loss": 0.0001,
      "step": 4594
    },
    {
      "epoch": 1.3447468539654668,
      "grad_norm": 0.1679302603006363,
      "learning_rate": 1.638132865086333e-05,
      "loss": 0.0005,
      "step": 4595
    },
    {
      "epoch": 1.3450395083406497,
      "grad_norm": 0.008735943585634232,
      "learning_rate": 1.637401229148376e-05,
      "loss": 0.0001,
      "step": 4596
    },
    {
      "epoch": 1.3453321627158326,
      "grad_norm": 0.010910887271165848,
      "learning_rate": 1.6366695932104186e-05,
      "loss": 0.0001,
      "step": 4597
    },
    {
      "epoch": 1.3456248170910154,
      "grad_norm": 0.007489155046641827,
      "learning_rate": 1.635937957272461e-05,
      "loss": 0.0001,
      "step": 4598
    },
    {
      "epoch": 1.3459174714661986,
      "grad_norm": 0.01282794401049614,
      "learning_rate": 1.635206321334504e-05,
      "loss": 0.0002,
      "step": 4599
    },
    {
      "epoch": 1.3462101258413814,
      "grad_norm": 0.0005722651840187609,
      "learning_rate": 1.6344746853965467e-05,
      "loss": 0.0,
      "step": 4600
    },
    {
      "epoch": 1.3465027802165643,
      "grad_norm": 0.02161412313580513,
      "learning_rate": 1.6337430494585895e-05,
      "loss": 0.0003,
      "step": 4601
    },
    {
      "epoch": 1.3467954345917472,
      "grad_norm": 0.03608215972781181,
      "learning_rate": 1.6330114135206323e-05,
      "loss": 0.0005,
      "step": 4602
    },
    {
      "epoch": 1.34708808896693,
      "grad_norm": 5.9469399275258183e-05,
      "learning_rate": 1.6322797775826747e-05,
      "loss": 0.0,
      "step": 4603
    },
    {
      "epoch": 1.347380743342113,
      "grad_norm": 0.00882419478148222,
      "learning_rate": 1.6315481416447175e-05,
      "loss": 0.0001,
      "step": 4604
    },
    {
      "epoch": 1.3476733977172959,
      "grad_norm": 0.20794746279716492,
      "learning_rate": 1.6308165057067603e-05,
      "loss": 0.0016,
      "step": 4605
    },
    {
      "epoch": 1.3479660520924788,
      "grad_norm": 0.0021146859508007765,
      "learning_rate": 1.630084869768803e-05,
      "loss": 0.0,
      "step": 4606
    },
    {
      "epoch": 1.3482587064676617,
      "grad_norm": 0.2945975959300995,
      "learning_rate": 1.629353233830846e-05,
      "loss": 0.0027,
      "step": 4607
    },
    {
      "epoch": 1.3485513608428445,
      "grad_norm": 0.004604853689670563,
      "learning_rate": 1.6286215978928887e-05,
      "loss": 0.0001,
      "step": 4608
    },
    {
      "epoch": 1.3488440152180274,
      "grad_norm": 0.021960347890853882,
      "learning_rate": 1.627889961954931e-05,
      "loss": 0.0003,
      "step": 4609
    },
    {
      "epoch": 1.3491366695932103,
      "grad_norm": 0.003494089236482978,
      "learning_rate": 1.627158326016974e-05,
      "loss": 0.0001,
      "step": 4610
    },
    {
      "epoch": 1.3494293239683932,
      "grad_norm": 0.017365049570798874,
      "learning_rate": 1.6264266900790167e-05,
      "loss": 0.0002,
      "step": 4611
    },
    {
      "epoch": 1.349721978343576,
      "grad_norm": 0.0007535493350587785,
      "learning_rate": 1.6256950541410595e-05,
      "loss": 0.0,
      "step": 4612
    },
    {
      "epoch": 1.3500146327187592,
      "grad_norm": 0.00664918078109622,
      "learning_rate": 1.6249634182031023e-05,
      "loss": 0.0001,
      "step": 4613
    },
    {
      "epoch": 1.350307287093942,
      "grad_norm": 0.8967113494873047,
      "learning_rate": 1.6242317822651448e-05,
      "loss": 0.0034,
      "step": 4614
    },
    {
      "epoch": 1.350599941469125,
      "grad_norm": 0.01985401101410389,
      "learning_rate": 1.6235001463271876e-05,
      "loss": 0.0002,
      "step": 4615
    },
    {
      "epoch": 1.3508925958443079,
      "grad_norm": 0.0015297238714993,
      "learning_rate": 1.6227685103892304e-05,
      "loss": 0.0,
      "step": 4616
    },
    {
      "epoch": 1.3511852502194908,
      "grad_norm": 0.0006844381568953395,
      "learning_rate": 1.6220368744512732e-05,
      "loss": 0.0,
      "step": 4617
    },
    {
      "epoch": 1.3514779045946737,
      "grad_norm": 0.00511653209105134,
      "learning_rate": 1.621305238513316e-05,
      "loss": 0.0,
      "step": 4618
    },
    {
      "epoch": 1.3517705589698565,
      "grad_norm": 0.004954398609697819,
      "learning_rate": 1.6205736025753584e-05,
      "loss": 0.0,
      "step": 4619
    },
    {
      "epoch": 1.3520632133450394,
      "grad_norm": 0.00022442862973548472,
      "learning_rate": 1.6198419666374012e-05,
      "loss": 0.0,
      "step": 4620
    },
    {
      "epoch": 1.3523558677202225,
      "grad_norm": 0.00047761958558112383,
      "learning_rate": 1.619110330699444e-05,
      "loss": 0.0,
      "step": 4621
    },
    {
      "epoch": 1.3526485220954054,
      "grad_norm": 0.027839884161949158,
      "learning_rate": 1.6183786947614868e-05,
      "loss": 0.0001,
      "step": 4622
    },
    {
      "epoch": 1.3529411764705883,
      "grad_norm": 0.00015593717398587614,
      "learning_rate": 1.6176470588235296e-05,
      "loss": 0.0,
      "step": 4623
    },
    {
      "epoch": 1.3532338308457712,
      "grad_norm": 0.0009893402457237244,
      "learning_rate": 1.6169154228855724e-05,
      "loss": 0.0,
      "step": 4624
    },
    {
      "epoch": 1.353526485220954,
      "grad_norm": 0.0024944068863987923,
      "learning_rate": 1.616183786947615e-05,
      "loss": 0.0,
      "step": 4625
    },
    {
      "epoch": 1.353819139596137,
      "grad_norm": 0.00030253714066930115,
      "learning_rate": 1.6154521510096577e-05,
      "loss": 0.0,
      "step": 4626
    },
    {
      "epoch": 1.3541117939713199,
      "grad_norm": 0.0011133461957797408,
      "learning_rate": 1.6147205150717005e-05,
      "loss": 0.0,
      "step": 4627
    },
    {
      "epoch": 1.3544044483465028,
      "grad_norm": 0.0009970914106816053,
      "learning_rate": 1.6139888791337432e-05,
      "loss": 0.0,
      "step": 4628
    },
    {
      "epoch": 1.3546971027216856,
      "grad_norm": 0.0009626855025999248,
      "learning_rate": 1.613257243195786e-05,
      "loss": 0.0,
      "step": 4629
    },
    {
      "epoch": 1.3549897570968685,
      "grad_norm": 0.0006123323692008853,
      "learning_rate": 1.6125256072578285e-05,
      "loss": 0.0,
      "step": 4630
    },
    {
      "epoch": 1.3552824114720514,
      "grad_norm": 0.010217566974461079,
      "learning_rate": 1.6117939713198713e-05,
      "loss": 0.0,
      "step": 4631
    },
    {
      "epoch": 1.3555750658472343,
      "grad_norm": 0.004811934195458889,
      "learning_rate": 1.611062335381914e-05,
      "loss": 0.0,
      "step": 4632
    },
    {
      "epoch": 1.3558677202224172,
      "grad_norm": 0.002419382566586137,
      "learning_rate": 1.610330699443957e-05,
      "loss": 0.0,
      "step": 4633
    },
    {
      "epoch": 1.3561603745976003,
      "grad_norm": 0.04514714330434799,
      "learning_rate": 1.6095990635059997e-05,
      "loss": 0.0002,
      "step": 4634
    },
    {
      "epoch": 1.3564530289727832,
      "grad_norm": 0.00044169966713525355,
      "learning_rate": 1.608867427568042e-05,
      "loss": 0.0,
      "step": 4635
    },
    {
      "epoch": 1.356745683347966,
      "grad_norm": 0.001194591517560184,
      "learning_rate": 1.608135791630085e-05,
      "loss": 0.0,
      "step": 4636
    },
    {
      "epoch": 1.357038337723149,
      "grad_norm": 2.3717167377471924,
      "learning_rate": 1.6074041556921277e-05,
      "loss": 0.0027,
      "step": 4637
    },
    {
      "epoch": 1.3573309920983319,
      "grad_norm": 0.0013330380897969007,
      "learning_rate": 1.6066725197541705e-05,
      "loss": 0.0,
      "step": 4638
    },
    {
      "epoch": 1.3576236464735147,
      "grad_norm": 0.00037112022982910275,
      "learning_rate": 1.6059408838162133e-05,
      "loss": 0.0,
      "step": 4639
    },
    {
      "epoch": 1.3579163008486976,
      "grad_norm": 0.00014045827265363187,
      "learning_rate": 1.605209247878256e-05,
      "loss": 0.0,
      "step": 4640
    },
    {
      "epoch": 1.3582089552238805,
      "grad_norm": 2.3475875854492188,
      "learning_rate": 1.6044776119402986e-05,
      "loss": 0.1643,
      "step": 4641
    },
    {
      "epoch": 1.3585016095990636,
      "grad_norm": 0.0006882725283503532,
      "learning_rate": 1.6037459760023414e-05,
      "loss": 0.0,
      "step": 4642
    },
    {
      "epoch": 1.3587942639742465,
      "grad_norm": 0.0011248913360759616,
      "learning_rate": 1.603014340064384e-05,
      "loss": 0.0,
      "step": 4643
    },
    {
      "epoch": 1.3590869183494294,
      "grad_norm": 0.0014706410001963377,
      "learning_rate": 1.602282704126427e-05,
      "loss": 0.0,
      "step": 4644
    },
    {
      "epoch": 1.3593795727246123,
      "grad_norm": 0.016344530507922173,
      "learning_rate": 1.6015510681884697e-05,
      "loss": 0.0001,
      "step": 4645
    },
    {
      "epoch": 1.3596722270997952,
      "grad_norm": 0.0013378773583099246,
      "learning_rate": 1.6008194322505122e-05,
      "loss": 0.0,
      "step": 4646
    },
    {
      "epoch": 1.359964881474978,
      "grad_norm": 0.0006376465316861868,
      "learning_rate": 1.600087796312555e-05,
      "loss": 0.0,
      "step": 4647
    },
    {
      "epoch": 1.360257535850161,
      "grad_norm": 0.0018161243060603738,
      "learning_rate": 1.5993561603745978e-05,
      "loss": 0.0,
      "step": 4648
    },
    {
      "epoch": 1.3605501902253438,
      "grad_norm": 0.9918575286865234,
      "learning_rate": 1.5986245244366406e-05,
      "loss": 0.0012,
      "step": 4649
    },
    {
      "epoch": 1.3608428446005267,
      "grad_norm": 0.0007200206164270639,
      "learning_rate": 1.5978928884986834e-05,
      "loss": 0.0,
      "step": 4650
    },
    {
      "epoch": 1.3611354989757096,
      "grad_norm": 0.03315776214003563,
      "learning_rate": 1.597161252560726e-05,
      "loss": 0.0003,
      "step": 4651
    },
    {
      "epoch": 1.3614281533508925,
      "grad_norm": 0.013876295648515224,
      "learning_rate": 1.5964296166227686e-05,
      "loss": 0.0002,
      "step": 4652
    },
    {
      "epoch": 1.3617208077260754,
      "grad_norm": 0.02295016311109066,
      "learning_rate": 1.5956979806848114e-05,
      "loss": 0.0002,
      "step": 4653
    },
    {
      "epoch": 1.3620134621012583,
      "grad_norm": 4.472840309143066,
      "learning_rate": 1.5949663447468542e-05,
      "loss": 0.2316,
      "step": 4654
    },
    {
      "epoch": 1.3623061164764414,
      "grad_norm": 2.750107765197754,
      "learning_rate": 1.594234708808897e-05,
      "loss": 0.0357,
      "step": 4655
    },
    {
      "epoch": 1.3625987708516243,
      "grad_norm": 0.0004565751878544688,
      "learning_rate": 1.5935030728709395e-05,
      "loss": 0.0,
      "step": 4656
    },
    {
      "epoch": 1.3628914252268072,
      "grad_norm": 0.0007538439822383225,
      "learning_rate": 1.5927714369329823e-05,
      "loss": 0.0,
      "step": 4657
    },
    {
      "epoch": 1.36318407960199,
      "grad_norm": 0.0005997567204758525,
      "learning_rate": 1.592039800995025e-05,
      "loss": 0.0,
      "step": 4658
    },
    {
      "epoch": 1.363476733977173,
      "grad_norm": 0.0017027143621817231,
      "learning_rate": 1.591308165057068e-05,
      "loss": 0.0,
      "step": 4659
    },
    {
      "epoch": 1.3637693883523558,
      "grad_norm": 0.04539995267987251,
      "learning_rate": 1.5905765291191103e-05,
      "loss": 0.0005,
      "step": 4660
    },
    {
      "epoch": 1.3640620427275387,
      "grad_norm": 0.0005709021934308112,
      "learning_rate": 1.589844893181153e-05,
      "loss": 0.0,
      "step": 4661
    },
    {
      "epoch": 1.3643546971027216,
      "grad_norm": 0.0025214501656591892,
      "learning_rate": 1.589113257243196e-05,
      "loss": 0.0,
      "step": 4662
    },
    {
      "epoch": 1.3646473514779047,
      "grad_norm": 0.0008173706009984016,
      "learning_rate": 1.5883816213052387e-05,
      "loss": 0.0,
      "step": 4663
    },
    {
      "epoch": 1.3649400058530876,
      "grad_norm": 0.0025939266197383404,
      "learning_rate": 1.587649985367281e-05,
      "loss": 0.0001,
      "step": 4664
    },
    {
      "epoch": 1.3652326602282705,
      "grad_norm": 0.0337810143828392,
      "learning_rate": 1.586918349429324e-05,
      "loss": 0.0003,
      "step": 4665
    },
    {
      "epoch": 1.3655253146034534,
      "grad_norm": 0.0031624024268239737,
      "learning_rate": 1.5861867134913667e-05,
      "loss": 0.0001,
      "step": 4666
    },
    {
      "epoch": 1.3658179689786363,
      "grad_norm": 0.0024290585424751043,
      "learning_rate": 1.5854550775534095e-05,
      "loss": 0.0,
      "step": 4667
    },
    {
      "epoch": 1.3661106233538192,
      "grad_norm": 0.0005581422010436654,
      "learning_rate": 1.584723441615452e-05,
      "loss": 0.0,
      "step": 4668
    },
    {
      "epoch": 1.366403277729002,
      "grad_norm": 0.0010313043603673577,
      "learning_rate": 1.5839918056774948e-05,
      "loss": 0.0,
      "step": 4669
    },
    {
      "epoch": 1.366695932104185,
      "grad_norm": 0.059076376259326935,
      "learning_rate": 1.5832601697395376e-05,
      "loss": 0.0003,
      "step": 4670
    },
    {
      "epoch": 1.3669885864793678,
      "grad_norm": 0.004700097721070051,
      "learning_rate": 1.5825285338015804e-05,
      "loss": 0.0001,
      "step": 4671
    },
    {
      "epoch": 1.3672812408545507,
      "grad_norm": 0.004779118578881025,
      "learning_rate": 1.581796897863623e-05,
      "loss": 0.0001,
      "step": 4672
    },
    {
      "epoch": 1.3675738952297336,
      "grad_norm": 0.002353015821427107,
      "learning_rate": 1.5810652619256656e-05,
      "loss": 0.0,
      "step": 4673
    },
    {
      "epoch": 1.3678665496049165,
      "grad_norm": 5.330374717712402,
      "learning_rate": 1.5803336259877084e-05,
      "loss": 0.235,
      "step": 4674
    },
    {
      "epoch": 1.3681592039800994,
      "grad_norm": 4.413346767425537,
      "learning_rate": 1.5796019900497512e-05,
      "loss": 0.256,
      "step": 4675
    },
    {
      "epoch": 1.3684518583552825,
      "grad_norm": 0.0033523140009492636,
      "learning_rate": 1.578870354111794e-05,
      "loss": 0.0001,
      "step": 4676
    },
    {
      "epoch": 1.3687445127304654,
      "grad_norm": 0.0006485573248937726,
      "learning_rate": 1.5781387181738368e-05,
      "loss": 0.0,
      "step": 4677
    },
    {
      "epoch": 1.3690371671056483,
      "grad_norm": 0.003252198686823249,
      "learning_rate": 1.5774070822358793e-05,
      "loss": 0.0001,
      "step": 4678
    },
    {
      "epoch": 1.3693298214808312,
      "grad_norm": 0.0008407050045207143,
      "learning_rate": 1.576675446297922e-05,
      "loss": 0.0,
      "step": 4679
    },
    {
      "epoch": 1.369622475856014,
      "grad_norm": 0.001850403961725533,
      "learning_rate": 1.575943810359965e-05,
      "loss": 0.0,
      "step": 4680
    },
    {
      "epoch": 1.369915130231197,
      "grad_norm": 0.16275891661643982,
      "learning_rate": 1.5752121744220077e-05,
      "loss": 0.0009,
      "step": 4681
    },
    {
      "epoch": 1.3702077846063798,
      "grad_norm": 0.29024508595466614,
      "learning_rate": 1.5744805384840504e-05,
      "loss": 0.0015,
      "step": 4682
    },
    {
      "epoch": 1.3705004389815627,
      "grad_norm": 0.8833554983139038,
      "learning_rate": 1.573748902546093e-05,
      "loss": 0.0039,
      "step": 4683
    },
    {
      "epoch": 1.3707930933567458,
      "grad_norm": 0.003219273639842868,
      "learning_rate": 1.5730172666081357e-05,
      "loss": 0.0001,
      "step": 4684
    },
    {
      "epoch": 1.3710857477319287,
      "grad_norm": 0.005362699273973703,
      "learning_rate": 1.5722856306701785e-05,
      "loss": 0.0001,
      "step": 4685
    },
    {
      "epoch": 1.3713784021071116,
      "grad_norm": 0.008708633482456207,
      "learning_rate": 1.5715539947322213e-05,
      "loss": 0.0002,
      "step": 4686
    },
    {
      "epoch": 1.3716710564822945,
      "grad_norm": 0.018540022894740105,
      "learning_rate": 1.570822358794264e-05,
      "loss": 0.0002,
      "step": 4687
    },
    {
      "epoch": 1.3719637108574774,
      "grad_norm": 0.007677059154957533,
      "learning_rate": 1.5700907228563065e-05,
      "loss": 0.0001,
      "step": 4688
    },
    {
      "epoch": 1.3722563652326603,
      "grad_norm": 0.0029118761885911226,
      "learning_rate": 1.5693590869183493e-05,
      "loss": 0.0001,
      "step": 4689
    },
    {
      "epoch": 1.3725490196078431,
      "grad_norm": 0.005615610629320145,
      "learning_rate": 1.568627450980392e-05,
      "loss": 0.0001,
      "step": 4690
    },
    {
      "epoch": 1.372841673983026,
      "grad_norm": 0.00372922420501709,
      "learning_rate": 1.567895815042435e-05,
      "loss": 0.0001,
      "step": 4691
    },
    {
      "epoch": 1.373134328358209,
      "grad_norm": 0.0014681870816275477,
      "learning_rate": 1.5671641791044777e-05,
      "loss": 0.0,
      "step": 4692
    },
    {
      "epoch": 1.3734269827333918,
      "grad_norm": 0.0036305561661720276,
      "learning_rate": 1.5664325431665205e-05,
      "loss": 0.0001,
      "step": 4693
    },
    {
      "epoch": 1.3737196371085747,
      "grad_norm": 0.003226341214030981,
      "learning_rate": 1.565700907228563e-05,
      "loss": 0.0001,
      "step": 4694
    },
    {
      "epoch": 1.3740122914837576,
      "grad_norm": 0.0033313813619315624,
      "learning_rate": 1.5649692712906058e-05,
      "loss": 0.0001,
      "step": 4695
    },
    {
      "epoch": 1.3743049458589405,
      "grad_norm": 0.002889020601287484,
      "learning_rate": 1.5642376353526486e-05,
      "loss": 0.0001,
      "step": 4696
    },
    {
      "epoch": 1.3745976002341234,
      "grad_norm": 0.001413546153344214,
      "learning_rate": 1.5635059994146914e-05,
      "loss": 0.0,
      "step": 4697
    },
    {
      "epoch": 1.3748902546093065,
      "grad_norm": 0.02051861211657524,
      "learning_rate": 1.562774363476734e-05,
      "loss": 0.0002,
      "step": 4698
    },
    {
      "epoch": 1.3751829089844894,
      "grad_norm": 0.02124558761715889,
      "learning_rate": 1.5620427275387766e-05,
      "loss": 0.0002,
      "step": 4699
    },
    {
      "epoch": 1.3754755633596722,
      "grad_norm": 0.042467426508665085,
      "learning_rate": 1.5613110916008194e-05,
      "loss": 0.0004,
      "step": 4700
    },
    {
      "epoch": 1.3757682177348551,
      "grad_norm": 1.1788674592971802,
      "learning_rate": 1.5605794556628622e-05,
      "loss": 0.0036,
      "step": 4701
    },
    {
      "epoch": 1.376060872110038,
      "grad_norm": 0.005982064642012119,
      "learning_rate": 1.559847819724905e-05,
      "loss": 0.0001,
      "step": 4702
    },
    {
      "epoch": 1.376353526485221,
      "grad_norm": 0.003018517279997468,
      "learning_rate": 1.5591161837869478e-05,
      "loss": 0.0001,
      "step": 4703
    },
    {
      "epoch": 1.3766461808604038,
      "grad_norm": 0.0016903309151530266,
      "learning_rate": 1.5583845478489902e-05,
      "loss": 0.0,
      "step": 4704
    },
    {
      "epoch": 1.3769388352355867,
      "grad_norm": 0.0051790825091302395,
      "learning_rate": 1.557652911911033e-05,
      "loss": 0.0001,
      "step": 4705
    },
    {
      "epoch": 1.3772314896107698,
      "grad_norm": 0.0019127910491079092,
      "learning_rate": 1.556921275973076e-05,
      "loss": 0.0,
      "step": 4706
    },
    {
      "epoch": 1.3775241439859527,
      "grad_norm": 0.0031322145368903875,
      "learning_rate": 1.5561896400351186e-05,
      "loss": 0.0001,
      "step": 4707
    },
    {
      "epoch": 1.3778167983611356,
      "grad_norm": 0.004797694273293018,
      "learning_rate": 1.5554580040971614e-05,
      "loss": 0.0001,
      "step": 4708
    },
    {
      "epoch": 1.3781094527363185,
      "grad_norm": 0.0022557724732905626,
      "learning_rate": 1.554726368159204e-05,
      "loss": 0.0001,
      "step": 4709
    },
    {
      "epoch": 1.3784021071115014,
      "grad_norm": 0.0006947844522073865,
      "learning_rate": 1.5539947322212467e-05,
      "loss": 0.0,
      "step": 4710
    },
    {
      "epoch": 1.3786947614866842,
      "grad_norm": 0.003478395752608776,
      "learning_rate": 1.5532630962832895e-05,
      "loss": 0.0001,
      "step": 4711
    },
    {
      "epoch": 1.3789874158618671,
      "grad_norm": 0.00031348259653896093,
      "learning_rate": 1.5525314603453323e-05,
      "loss": 0.0,
      "step": 4712
    },
    {
      "epoch": 1.37928007023705,
      "grad_norm": 0.0031743720173835754,
      "learning_rate": 1.551799824407375e-05,
      "loss": 0.0001,
      "step": 4713
    },
    {
      "epoch": 1.379572724612233,
      "grad_norm": 0.0017899590311571956,
      "learning_rate": 1.551068188469418e-05,
      "loss": 0.0,
      "step": 4714
    },
    {
      "epoch": 1.3798653789874158,
      "grad_norm": 0.0030690422281622887,
      "learning_rate": 1.5503365525314603e-05,
      "loss": 0.0,
      "step": 4715
    },
    {
      "epoch": 1.3801580333625987,
      "grad_norm": 0.007692730985581875,
      "learning_rate": 1.549604916593503e-05,
      "loss": 0.0001,
      "step": 4716
    },
    {
      "epoch": 1.3804506877377816,
      "grad_norm": 0.04855996370315552,
      "learning_rate": 1.548873280655546e-05,
      "loss": 0.0004,
      "step": 4717
    },
    {
      "epoch": 1.3807433421129645,
      "grad_norm": 0.007183206267654896,
      "learning_rate": 1.5481416447175887e-05,
      "loss": 0.0001,
      "step": 4718
    },
    {
      "epoch": 1.3810359964881476,
      "grad_norm": 0.001024767872877419,
      "learning_rate": 1.5474100087796315e-05,
      "loss": 0.0,
      "step": 4719
    },
    {
      "epoch": 1.3813286508633305,
      "grad_norm": 0.003089958569034934,
      "learning_rate": 1.546678372841674e-05,
      "loss": 0.0001,
      "step": 4720
    },
    {
      "epoch": 1.3816213052385133,
      "grad_norm": 0.010393207892775536,
      "learning_rate": 1.5459467369037167e-05,
      "loss": 0.0001,
      "step": 4721
    },
    {
      "epoch": 1.3819139596136962,
      "grad_norm": 0.03264763206243515,
      "learning_rate": 1.5452151009657595e-05,
      "loss": 0.0003,
      "step": 4722
    },
    {
      "epoch": 1.3822066139888791,
      "grad_norm": 0.0018647081451490521,
      "learning_rate": 1.5444834650278023e-05,
      "loss": 0.0001,
      "step": 4723
    },
    {
      "epoch": 1.382499268364062,
      "grad_norm": 0.001332324231043458,
      "learning_rate": 1.543751829089845e-05,
      "loss": 0.0,
      "step": 4724
    },
    {
      "epoch": 1.382791922739245,
      "grad_norm": 0.015286540612578392,
      "learning_rate": 1.5430201931518876e-05,
      "loss": 0.0001,
      "step": 4725
    },
    {
      "epoch": 1.3830845771144278,
      "grad_norm": 0.0017781606875360012,
      "learning_rate": 1.5422885572139304e-05,
      "loss": 0.0,
      "step": 4726
    },
    {
      "epoch": 1.383377231489611,
      "grad_norm": 0.0014070915058255196,
      "learning_rate": 1.5415569212759732e-05,
      "loss": 0.0,
      "step": 4727
    },
    {
      "epoch": 1.3836698858647938,
      "grad_norm": 0.0006733147893100977,
      "learning_rate": 1.540825285338016e-05,
      "loss": 0.0,
      "step": 4728
    },
    {
      "epoch": 1.3839625402399767,
      "grad_norm": 0.001550506567582488,
      "learning_rate": 1.5400936494000588e-05,
      "loss": 0.0,
      "step": 4729
    },
    {
      "epoch": 1.3842551946151596,
      "grad_norm": 0.0058020418509840965,
      "learning_rate": 1.5393620134621016e-05,
      "loss": 0.0001,
      "step": 4730
    },
    {
      "epoch": 1.3845478489903424,
      "grad_norm": 0.005312574096024036,
      "learning_rate": 1.538630377524144e-05,
      "loss": 0.0001,
      "step": 4731
    },
    {
      "epoch": 1.3848405033655253,
      "grad_norm": 0.003337213536724448,
      "learning_rate": 1.5378987415861868e-05,
      "loss": 0.0,
      "step": 4732
    },
    {
      "epoch": 1.3851331577407082,
      "grad_norm": 0.0022060826886445284,
      "learning_rate": 1.5371671056482296e-05,
      "loss": 0.0,
      "step": 4733
    },
    {
      "epoch": 1.385425812115891,
      "grad_norm": 0.0006479641306214035,
      "learning_rate": 1.5364354697102724e-05,
      "loss": 0.0,
      "step": 4734
    },
    {
      "epoch": 1.385718466491074,
      "grad_norm": 0.0009419500129297376,
      "learning_rate": 1.5357038337723152e-05,
      "loss": 0.0,
      "step": 4735
    },
    {
      "epoch": 1.3860111208662569,
      "grad_norm": 0.0012897298438474536,
      "learning_rate": 1.5349721978343577e-05,
      "loss": 0.0,
      "step": 4736
    },
    {
      "epoch": 1.3863037752414398,
      "grad_norm": 0.0025886131916195154,
      "learning_rate": 1.5342405618964004e-05,
      "loss": 0.0001,
      "step": 4737
    },
    {
      "epoch": 1.3865964296166227,
      "grad_norm": 0.0006920943851582706,
      "learning_rate": 1.5335089259584432e-05,
      "loss": 0.0,
      "step": 4738
    },
    {
      "epoch": 1.3868890839918055,
      "grad_norm": 0.0004685150342993438,
      "learning_rate": 1.532777290020486e-05,
      "loss": 0.0,
      "step": 4739
    },
    {
      "epoch": 1.3871817383669887,
      "grad_norm": 0.000487115845317021,
      "learning_rate": 1.532045654082529e-05,
      "loss": 0.0,
      "step": 4740
    },
    {
      "epoch": 1.3874743927421715,
      "grad_norm": 0.00047260106657631695,
      "learning_rate": 1.5313140181445713e-05,
      "loss": 0.0,
      "step": 4741
    },
    {
      "epoch": 1.3877670471173544,
      "grad_norm": 0.008047452196478844,
      "learning_rate": 1.530582382206614e-05,
      "loss": 0.0001,
      "step": 4742
    },
    {
      "epoch": 1.3880597014925373,
      "grad_norm": 0.02765766903758049,
      "learning_rate": 1.529850746268657e-05,
      "loss": 0.0002,
      "step": 4743
    },
    {
      "epoch": 1.3883523558677202,
      "grad_norm": 0.0014313700376078486,
      "learning_rate": 1.5291191103306997e-05,
      "loss": 0.0,
      "step": 4744
    },
    {
      "epoch": 1.388645010242903,
      "grad_norm": 0.21554401516914368,
      "learning_rate": 1.5283874743927425e-05,
      "loss": 0.0008,
      "step": 4745
    },
    {
      "epoch": 1.388937664618086,
      "grad_norm": 0.002505704527720809,
      "learning_rate": 1.5276558384547853e-05,
      "loss": 0.0,
      "step": 4746
    },
    {
      "epoch": 1.3892303189932689,
      "grad_norm": 0.0006845356547273695,
      "learning_rate": 1.5269242025168277e-05,
      "loss": 0.0,
      "step": 4747
    },
    {
      "epoch": 1.389522973368452,
      "grad_norm": 0.0009476763661950827,
      "learning_rate": 1.5261925665788705e-05,
      "loss": 0.0,
      "step": 4748
    },
    {
      "epoch": 1.3898156277436349,
      "grad_norm": 0.019107108935713768,
      "learning_rate": 1.5254609306409131e-05,
      "loss": 0.0002,
      "step": 4749
    },
    {
      "epoch": 1.3901082821188178,
      "grad_norm": 0.0009891375666484237,
      "learning_rate": 1.524729294702956e-05,
      "loss": 0.0,
      "step": 4750
    },
    {
      "epoch": 1.3904009364940006,
      "grad_norm": 0.0008806857513263822,
      "learning_rate": 1.5239976587649987e-05,
      "loss": 0.0,
      "step": 4751
    },
    {
      "epoch": 1.3906935908691835,
      "grad_norm": 0.0010386556386947632,
      "learning_rate": 1.5232660228270412e-05,
      "loss": 0.0,
      "step": 4752
    },
    {
      "epoch": 1.3909862452443664,
      "grad_norm": 0.001391564728692174,
      "learning_rate": 1.522534386889084e-05,
      "loss": 0.0,
      "step": 4753
    },
    {
      "epoch": 1.3912788996195493,
      "grad_norm": 0.00067763717379421,
      "learning_rate": 1.5218027509511268e-05,
      "loss": 0.0,
      "step": 4754
    },
    {
      "epoch": 1.3915715539947322,
      "grad_norm": 0.000680471770465374,
      "learning_rate": 1.5210711150131696e-05,
      "loss": 0.0,
      "step": 4755
    },
    {
      "epoch": 1.391864208369915,
      "grad_norm": 0.001424195826984942,
      "learning_rate": 1.5203394790752124e-05,
      "loss": 0.0,
      "step": 4756
    },
    {
      "epoch": 1.392156862745098,
      "grad_norm": 0.0004938715137541294,
      "learning_rate": 1.5196078431372548e-05,
      "loss": 0.0,
      "step": 4757
    },
    {
      "epoch": 1.3924495171202809,
      "grad_norm": 0.0009264610707759857,
      "learning_rate": 1.5188762071992976e-05,
      "loss": 0.0,
      "step": 4758
    },
    {
      "epoch": 1.3927421714954638,
      "grad_norm": 0.024234339594841003,
      "learning_rate": 1.5181445712613404e-05,
      "loss": 0.0002,
      "step": 4759
    },
    {
      "epoch": 1.3930348258706466,
      "grad_norm": 0.0010981353698298335,
      "learning_rate": 1.5174129353233832e-05,
      "loss": 0.0,
      "step": 4760
    },
    {
      "epoch": 1.3933274802458298,
      "grad_norm": 0.0005907249869778752,
      "learning_rate": 1.516681299385426e-05,
      "loss": 0.0,
      "step": 4761
    },
    {
      "epoch": 1.3936201346210126,
      "grad_norm": 25.285842895507812,
      "learning_rate": 1.5159496634474688e-05,
      "loss": 0.0793,
      "step": 4762
    },
    {
      "epoch": 1.3939127889961955,
      "grad_norm": 0.0005344810779206455,
      "learning_rate": 1.5152180275095113e-05,
      "loss": 0.0,
      "step": 4763
    },
    {
      "epoch": 1.3942054433713784,
      "grad_norm": 0.003299710340797901,
      "learning_rate": 1.514486391571554e-05,
      "loss": 0.0001,
      "step": 4764
    },
    {
      "epoch": 1.3944980977465613,
      "grad_norm": 0.0008219130686484277,
      "learning_rate": 1.5137547556335968e-05,
      "loss": 0.0,
      "step": 4765
    },
    {
      "epoch": 1.3947907521217442,
      "grad_norm": 0.013707981444895267,
      "learning_rate": 1.5130231196956396e-05,
      "loss": 0.0001,
      "step": 4766
    },
    {
      "epoch": 1.395083406496927,
      "grad_norm": 0.0005613876855932176,
      "learning_rate": 1.5122914837576824e-05,
      "loss": 0.0,
      "step": 4767
    },
    {
      "epoch": 1.39537606087211,
      "grad_norm": 0.0008589632343500853,
      "learning_rate": 1.5115598478197249e-05,
      "loss": 0.0,
      "step": 4768
    },
    {
      "epoch": 1.395668715247293,
      "grad_norm": 0.0007482717046514153,
      "learning_rate": 1.5108282118817677e-05,
      "loss": 0.0,
      "step": 4769
    },
    {
      "epoch": 1.395961369622476,
      "grad_norm": 0.0007141092792153358,
      "learning_rate": 1.5100965759438105e-05,
      "loss": 0.0,
      "step": 4770
    },
    {
      "epoch": 1.3962540239976589,
      "grad_norm": 0.0016803006874397397,
      "learning_rate": 1.5093649400058533e-05,
      "loss": 0.0,
      "step": 4771
    },
    {
      "epoch": 1.3965466783728417,
      "grad_norm": 0.0020187615882605314,
      "learning_rate": 1.508633304067896e-05,
      "loss": 0.0,
      "step": 4772
    },
    {
      "epoch": 1.3968393327480246,
      "grad_norm": 0.00683267368003726,
      "learning_rate": 1.5079016681299385e-05,
      "loss": 0.0001,
      "step": 4773
    },
    {
      "epoch": 1.3971319871232075,
      "grad_norm": 0.00163374247495085,
      "learning_rate": 1.5071700321919813e-05,
      "loss": 0.0,
      "step": 4774
    },
    {
      "epoch": 1.3974246414983904,
      "grad_norm": 0.001296279369853437,
      "learning_rate": 1.5064383962540241e-05,
      "loss": 0.0,
      "step": 4775
    },
    {
      "epoch": 1.3977172958735733,
      "grad_norm": 0.013661934062838554,
      "learning_rate": 1.5057067603160669e-05,
      "loss": 0.0002,
      "step": 4776
    },
    {
      "epoch": 1.3980099502487562,
      "grad_norm": 0.053470246493816376,
      "learning_rate": 1.5049751243781095e-05,
      "loss": 0.0002,
      "step": 4777
    },
    {
      "epoch": 1.398302604623939,
      "grad_norm": 0.0011320695048198104,
      "learning_rate": 1.5042434884401522e-05,
      "loss": 0.0,
      "step": 4778
    },
    {
      "epoch": 1.398595258999122,
      "grad_norm": 0.0019740501884371042,
      "learning_rate": 1.503511852502195e-05,
      "loss": 0.0,
      "step": 4779
    },
    {
      "epoch": 1.3988879133743048,
      "grad_norm": 0.0005951537750661373,
      "learning_rate": 1.5027802165642378e-05,
      "loss": 0.0,
      "step": 4780
    },
    {
      "epoch": 1.3991805677494877,
      "grad_norm": 0.0020884210243821144,
      "learning_rate": 1.5020485806262804e-05,
      "loss": 0.0,
      "step": 4781
    },
    {
      "epoch": 1.3994732221246708,
      "grad_norm": 0.0016150816809386015,
      "learning_rate": 1.5013169446883232e-05,
      "loss": 0.0,
      "step": 4782
    },
    {
      "epoch": 1.3997658764998537,
      "grad_norm": 0.0014983267756178975,
      "learning_rate": 1.500585308750366e-05,
      "loss": 0.0,
      "step": 4783
    },
    {
      "epoch": 1.4000585308750366,
      "grad_norm": 0.004859476815909147,
      "learning_rate": 1.4998536728124086e-05,
      "loss": 0.0001,
      "step": 4784
    },
    {
      "epoch": 1.4003511852502195,
      "grad_norm": 0.0011176099069416523,
      "learning_rate": 1.4991220368744512e-05,
      "loss": 0.0,
      "step": 4785
    },
    {
      "epoch": 1.4006438396254024,
      "grad_norm": 0.003072738880291581,
      "learning_rate": 1.498390400936494e-05,
      "loss": 0.0001,
      "step": 4786
    },
    {
      "epoch": 1.4009364940005853,
      "grad_norm": 0.0038070150185376406,
      "learning_rate": 1.4976587649985368e-05,
      "loss": 0.0001,
      "step": 4787
    },
    {
      "epoch": 1.4012291483757682,
      "grad_norm": 0.0016997898928821087,
      "learning_rate": 1.4969271290605796e-05,
      "loss": 0.0,
      "step": 4788
    },
    {
      "epoch": 1.401521802750951,
      "grad_norm": 0.0010296344989910722,
      "learning_rate": 1.496195493122622e-05,
      "loss": 0.0,
      "step": 4789
    },
    {
      "epoch": 1.4018144571261342,
      "grad_norm": 0.00441362801939249,
      "learning_rate": 1.4954638571846649e-05,
      "loss": 0.0001,
      "step": 4790
    },
    {
      "epoch": 1.402107111501317,
      "grad_norm": 0.0014113447396084666,
      "learning_rate": 1.4947322212467077e-05,
      "loss": 0.0,
      "step": 4791
    },
    {
      "epoch": 1.4023997658765,
      "grad_norm": 0.008393810130655766,
      "learning_rate": 1.4940005853087504e-05,
      "loss": 0.0001,
      "step": 4792
    },
    {
      "epoch": 1.4026924202516828,
      "grad_norm": 10.538448333740234,
      "learning_rate": 1.4932689493707932e-05,
      "loss": 0.0358,
      "step": 4793
    },
    {
      "epoch": 1.4029850746268657,
      "grad_norm": 0.0042242067866027355,
      "learning_rate": 1.4925373134328357e-05,
      "loss": 0.0001,
      "step": 4794
    },
    {
      "epoch": 1.4032777290020486,
      "grad_norm": 0.0030644184444099665,
      "learning_rate": 1.4918056774948785e-05,
      "loss": 0.0,
      "step": 4795
    },
    {
      "epoch": 1.4035703833772315,
      "grad_norm": 0.0134981544688344,
      "learning_rate": 1.4910740415569213e-05,
      "loss": 0.0001,
      "step": 4796
    },
    {
      "epoch": 1.4038630377524144,
      "grad_norm": 0.0011584991589188576,
      "learning_rate": 1.4903424056189641e-05,
      "loss": 0.0,
      "step": 4797
    },
    {
      "epoch": 1.4041556921275973,
      "grad_norm": 0.0006236496847122908,
      "learning_rate": 1.4896107696810069e-05,
      "loss": 0.0,
      "step": 4798
    },
    {
      "epoch": 1.4044483465027802,
      "grad_norm": 0.0005023694830015302,
      "learning_rate": 1.4888791337430497e-05,
      "loss": 0.0,
      "step": 4799
    },
    {
      "epoch": 1.404741000877963,
      "grad_norm": 0.0009039943688549101,
      "learning_rate": 1.4881474978050921e-05,
      "loss": 0.0,
      "step": 4800
    },
    {
      "epoch": 1.405033655253146,
      "grad_norm": 0.0020111447665840387,
      "learning_rate": 1.487415861867135e-05,
      "loss": 0.0,
      "step": 4801
    },
    {
      "epoch": 1.4053263096283288,
      "grad_norm": 0.001803457853384316,
      "learning_rate": 1.4866842259291777e-05,
      "loss": 0.0,
      "step": 4802
    },
    {
      "epoch": 1.4056189640035117,
      "grad_norm": 0.0007646935409866273,
      "learning_rate": 1.4859525899912205e-05,
      "loss": 0.0,
      "step": 4803
    },
    {
      "epoch": 1.4059116183786948,
      "grad_norm": 0.0012778372038155794,
      "learning_rate": 1.4852209540532633e-05,
      "loss": 0.0,
      "step": 4804
    },
    {
      "epoch": 1.4062042727538777,
      "grad_norm": 0.0005517517565749586,
      "learning_rate": 1.4844893181153058e-05,
      "loss": 0.0,
      "step": 4805
    },
    {
      "epoch": 1.4064969271290606,
      "grad_norm": 0.004605107940733433,
      "learning_rate": 1.4837576821773486e-05,
      "loss": 0.0001,
      "step": 4806
    },
    {
      "epoch": 1.4067895815042435,
      "grad_norm": 0.0006094065029174089,
      "learning_rate": 1.4830260462393914e-05,
      "loss": 0.0,
      "step": 4807
    },
    {
      "epoch": 1.4070822358794264,
      "grad_norm": 0.0008657328435219824,
      "learning_rate": 1.4822944103014342e-05,
      "loss": 0.0,
      "step": 4808
    },
    {
      "epoch": 1.4073748902546093,
      "grad_norm": 0.002067999681457877,
      "learning_rate": 1.481562774363477e-05,
      "loss": 0.0,
      "step": 4809
    },
    {
      "epoch": 1.4076675446297922,
      "grad_norm": 0.0011860699160024524,
      "learning_rate": 1.4808311384255194e-05,
      "loss": 0.0,
      "step": 4810
    },
    {
      "epoch": 1.407960199004975,
      "grad_norm": 0.00043056573485955596,
      "learning_rate": 1.4800995024875622e-05,
      "loss": 0.0,
      "step": 4811
    },
    {
      "epoch": 1.4082528533801582,
      "grad_norm": 0.0012745216954499483,
      "learning_rate": 1.479367866549605e-05,
      "loss": 0.0,
      "step": 4812
    },
    {
      "epoch": 1.408545507755341,
      "grad_norm": 0.0004913328448310494,
      "learning_rate": 1.4786362306116478e-05,
      "loss": 0.0,
      "step": 4813
    },
    {
      "epoch": 1.408838162130524,
      "grad_norm": 0.0008683112100698054,
      "learning_rate": 1.4779045946736906e-05,
      "loss": 0.0,
      "step": 4814
    },
    {
      "epoch": 1.4091308165057068,
      "grad_norm": 0.0005154652753844857,
      "learning_rate": 1.4771729587357334e-05,
      "loss": 0.0,
      "step": 4815
    },
    {
      "epoch": 1.4094234708808897,
      "grad_norm": 0.0005342514486983418,
      "learning_rate": 1.4764413227977758e-05,
      "loss": 0.0,
      "step": 4816
    },
    {
      "epoch": 1.4097161252560726,
      "grad_norm": 0.0017309903632849455,
      "learning_rate": 1.4757096868598186e-05,
      "loss": 0.0,
      "step": 4817
    },
    {
      "epoch": 1.4100087796312555,
      "grad_norm": 0.0008248227532021701,
      "learning_rate": 1.4749780509218614e-05,
      "loss": 0.0,
      "step": 4818
    },
    {
      "epoch": 1.4103014340064384,
      "grad_norm": 0.000505580217577517,
      "learning_rate": 1.4742464149839042e-05,
      "loss": 0.0,
      "step": 4819
    },
    {
      "epoch": 1.4105940883816213,
      "grad_norm": 0.0016366797499358654,
      "learning_rate": 1.4735147790459468e-05,
      "loss": 0.0,
      "step": 4820
    },
    {
      "epoch": 1.4108867427568041,
      "grad_norm": 0.0010418167803436518,
      "learning_rate": 1.4727831431079895e-05,
      "loss": 0.0,
      "step": 4821
    },
    {
      "epoch": 1.411179397131987,
      "grad_norm": 0.0002579023130238056,
      "learning_rate": 1.4720515071700323e-05,
      "loss": 0.0,
      "step": 4822
    },
    {
      "epoch": 1.41147205150717,
      "grad_norm": 0.00028388010105118155,
      "learning_rate": 1.471319871232075e-05,
      "loss": 0.0,
      "step": 4823
    },
    {
      "epoch": 1.4117647058823528,
      "grad_norm": 0.0003754272765945643,
      "learning_rate": 1.4705882352941177e-05,
      "loss": 0.0,
      "step": 4824
    },
    {
      "epoch": 1.412057360257536,
      "grad_norm": 0.007686973083764315,
      "learning_rate": 1.4698565993561605e-05,
      "loss": 0.0001,
      "step": 4825
    },
    {
      "epoch": 1.4123500146327188,
      "grad_norm": 0.0014296626904979348,
      "learning_rate": 1.4691249634182031e-05,
      "loss": 0.0,
      "step": 4826
    },
    {
      "epoch": 1.4126426690079017,
      "grad_norm": 0.000988753279671073,
      "learning_rate": 1.4683933274802459e-05,
      "loss": 0.0,
      "step": 4827
    },
    {
      "epoch": 1.4129353233830846,
      "grad_norm": 0.005212455987930298,
      "learning_rate": 1.4676616915422885e-05,
      "loss": 0.0001,
      "step": 4828
    },
    {
      "epoch": 1.4132279777582675,
      "grad_norm": 0.0006819405243732035,
      "learning_rate": 1.4669300556043313e-05,
      "loss": 0.0,
      "step": 4829
    },
    {
      "epoch": 1.4135206321334504,
      "grad_norm": 0.0005956888780929148,
      "learning_rate": 1.4661984196663741e-05,
      "loss": 0.0,
      "step": 4830
    },
    {
      "epoch": 1.4138132865086332,
      "grad_norm": 0.0010589334415271878,
      "learning_rate": 1.4654667837284169e-05,
      "loss": 0.0,
      "step": 4831
    },
    {
      "epoch": 1.4141059408838161,
      "grad_norm": 0.0024124777410179377,
      "learning_rate": 1.4647351477904594e-05,
      "loss": 0.0,
      "step": 4832
    },
    {
      "epoch": 1.4143985952589992,
      "grad_norm": 0.0024167487863451242,
      "learning_rate": 1.4640035118525022e-05,
      "loss": 0.0,
      "step": 4833
    },
    {
      "epoch": 1.4146912496341821,
      "grad_norm": 0.001746480236761272,
      "learning_rate": 1.463271875914545e-05,
      "loss": 0.0,
      "step": 4834
    },
    {
      "epoch": 1.414983904009365,
      "grad_norm": 0.0018156811129301786,
      "learning_rate": 1.4625402399765878e-05,
      "loss": 0.0,
      "step": 4835
    },
    {
      "epoch": 1.415276558384548,
      "grad_norm": 0.0006777496309950948,
      "learning_rate": 1.4618086040386306e-05,
      "loss": 0.0,
      "step": 4836
    },
    {
      "epoch": 1.4155692127597308,
      "grad_norm": 0.0018828504253178835,
      "learning_rate": 1.461076968100673e-05,
      "loss": 0.0,
      "step": 4837
    },
    {
      "epoch": 1.4158618671349137,
      "grad_norm": 0.0006251346203498542,
      "learning_rate": 1.4603453321627158e-05,
      "loss": 0.0,
      "step": 4838
    },
    {
      "epoch": 1.4161545215100966,
      "grad_norm": 0.0006089691305533051,
      "learning_rate": 1.4596136962247586e-05,
      "loss": 0.0,
      "step": 4839
    },
    {
      "epoch": 1.4164471758852795,
      "grad_norm": 0.0014087685849517584,
      "learning_rate": 1.4588820602868014e-05,
      "loss": 0.0,
      "step": 4840
    },
    {
      "epoch": 1.4167398302604624,
      "grad_norm": 0.0006932232645340264,
      "learning_rate": 1.4581504243488442e-05,
      "loss": 0.0,
      "step": 4841
    },
    {
      "epoch": 1.4170324846356452,
      "grad_norm": 0.0017885544802993536,
      "learning_rate": 1.4574187884108866e-05,
      "loss": 0.0,
      "step": 4842
    },
    {
      "epoch": 1.4173251390108281,
      "grad_norm": 0.00011074745270889252,
      "learning_rate": 1.4566871524729294e-05,
      "loss": 0.0,
      "step": 4843
    },
    {
      "epoch": 1.417617793386011,
      "grad_norm": 0.0006461992743425071,
      "learning_rate": 1.4559555165349722e-05,
      "loss": 0.0,
      "step": 4844
    },
    {
      "epoch": 1.417910447761194,
      "grad_norm": 0.002514176769182086,
      "learning_rate": 1.455223880597015e-05,
      "loss": 0.0001,
      "step": 4845
    },
    {
      "epoch": 1.418203102136377,
      "grad_norm": 0.0006553163984790444,
      "learning_rate": 1.4544922446590578e-05,
      "loss": 0.0,
      "step": 4846
    },
    {
      "epoch": 1.41849575651156,
      "grad_norm": 0.0013165060663595796,
      "learning_rate": 1.4537606087211003e-05,
      "loss": 0.0,
      "step": 4847
    },
    {
      "epoch": 1.4187884108867428,
      "grad_norm": 0.0023327763192355633,
      "learning_rate": 1.453028972783143e-05,
      "loss": 0.0,
      "step": 4848
    },
    {
      "epoch": 1.4190810652619257,
      "grad_norm": 0.0013811510289087892,
      "learning_rate": 1.4522973368451859e-05,
      "loss": 0.0,
      "step": 4849
    },
    {
      "epoch": 1.4193737196371086,
      "grad_norm": 0.00045944369048811495,
      "learning_rate": 1.4515657009072287e-05,
      "loss": 0.0,
      "step": 4850
    },
    {
      "epoch": 1.4196663740122915,
      "grad_norm": 0.0009314783965237439,
      "learning_rate": 1.4508340649692715e-05,
      "loss": 0.0,
      "step": 4851
    },
    {
      "epoch": 1.4199590283874743,
      "grad_norm": 0.00024277539341710508,
      "learning_rate": 1.4501024290313143e-05,
      "loss": 0.0,
      "step": 4852
    },
    {
      "epoch": 1.4202516827626572,
      "grad_norm": 0.0004942430532537401,
      "learning_rate": 1.4493707930933567e-05,
      "loss": 0.0,
      "step": 4853
    },
    {
      "epoch": 1.4205443371378403,
      "grad_norm": 0.0028814657125622034,
      "learning_rate": 1.4486391571553995e-05,
      "loss": 0.0,
      "step": 4854
    },
    {
      "epoch": 1.4208369915130232,
      "grad_norm": 0.001684304908849299,
      "learning_rate": 1.4479075212174423e-05,
      "loss": 0.0,
      "step": 4855
    },
    {
      "epoch": 1.4211296458882061,
      "grad_norm": 0.0008666531648486853,
      "learning_rate": 1.4471758852794851e-05,
      "loss": 0.0,
      "step": 4856
    },
    {
      "epoch": 1.421422300263389,
      "grad_norm": 0.0023896319326013327,
      "learning_rate": 1.4464442493415279e-05,
      "loss": 0.0,
      "step": 4857
    },
    {
      "epoch": 1.421714954638572,
      "grad_norm": 0.0006379640544764698,
      "learning_rate": 1.4457126134035703e-05,
      "loss": 0.0,
      "step": 4858
    },
    {
      "epoch": 1.4220076090137548,
      "grad_norm": 0.0034662364050745964,
      "learning_rate": 1.4449809774656131e-05,
      "loss": 0.0001,
      "step": 4859
    },
    {
      "epoch": 1.4223002633889377,
      "grad_norm": 0.40443992614746094,
      "learning_rate": 1.444249341527656e-05,
      "loss": 0.0012,
      "step": 4860
    },
    {
      "epoch": 1.4225929177641206,
      "grad_norm": 0.0009623516816645861,
      "learning_rate": 1.4435177055896987e-05,
      "loss": 0.0,
      "step": 4861
    },
    {
      "epoch": 1.4228855721393034,
      "grad_norm": 0.004101148806512356,
      "learning_rate": 1.4427860696517415e-05,
      "loss": 0.0,
      "step": 4862
    },
    {
      "epoch": 1.4231782265144863,
      "grad_norm": 0.0006218489143066108,
      "learning_rate": 1.442054433713784e-05,
      "loss": 0.0,
      "step": 4863
    },
    {
      "epoch": 1.4234708808896692,
      "grad_norm": 0.0009097387082874775,
      "learning_rate": 1.4413227977758268e-05,
      "loss": 0.0,
      "step": 4864
    },
    {
      "epoch": 1.423763535264852,
      "grad_norm": 0.0008629619260318577,
      "learning_rate": 1.4405911618378696e-05,
      "loss": 0.0,
      "step": 4865
    },
    {
      "epoch": 1.424056189640035,
      "grad_norm": 0.0007062812801450491,
      "learning_rate": 1.4398595258999124e-05,
      "loss": 0.0,
      "step": 4866
    },
    {
      "epoch": 1.424348844015218,
      "grad_norm": 0.0006604838999919593,
      "learning_rate": 1.439127889961955e-05,
      "loss": 0.0,
      "step": 4867
    },
    {
      "epoch": 1.424641498390401,
      "grad_norm": 0.0003423292946536094,
      "learning_rate": 1.4383962540239978e-05,
      "loss": 0.0,
      "step": 4868
    },
    {
      "epoch": 1.4249341527655839,
      "grad_norm": 0.0016571956221014261,
      "learning_rate": 1.4376646180860404e-05,
      "loss": 0.0,
      "step": 4869
    },
    {
      "epoch": 1.4252268071407668,
      "grad_norm": 0.0017341498751193285,
      "learning_rate": 1.4369329821480832e-05,
      "loss": 0.0,
      "step": 4870
    },
    {
      "epoch": 1.4255194615159497,
      "grad_norm": 0.0006102113402448595,
      "learning_rate": 1.4362013462101258e-05,
      "loss": 0.0,
      "step": 4871
    },
    {
      "epoch": 1.4258121158911325,
      "grad_norm": 0.0016994901234284043,
      "learning_rate": 1.4354697102721686e-05,
      "loss": 0.0,
      "step": 4872
    },
    {
      "epoch": 1.4261047702663154,
      "grad_norm": 0.0011881024111062288,
      "learning_rate": 1.4347380743342114e-05,
      "loss": 0.0,
      "step": 4873
    },
    {
      "epoch": 1.4263974246414983,
      "grad_norm": 0.0010435834992676973,
      "learning_rate": 1.434006438396254e-05,
      "loss": 0.0,
      "step": 4874
    },
    {
      "epoch": 1.4266900790166814,
      "grad_norm": 0.007706243544816971,
      "learning_rate": 1.4332748024582968e-05,
      "loss": 0.0001,
      "step": 4875
    },
    {
      "epoch": 1.4269827333918643,
      "grad_norm": 0.0002755485475063324,
      "learning_rate": 1.4325431665203395e-05,
      "loss": 0.0,
      "step": 4876
    },
    {
      "epoch": 1.4272753877670472,
      "grad_norm": 0.004249555990099907,
      "learning_rate": 1.4318115305823823e-05,
      "loss": 0.0001,
      "step": 4877
    },
    {
      "epoch": 1.42756804214223,
      "grad_norm": 0.000787175667937845,
      "learning_rate": 1.431079894644425e-05,
      "loss": 0.0,
      "step": 4878
    },
    {
      "epoch": 1.427860696517413,
      "grad_norm": 0.0003150795819237828,
      "learning_rate": 1.4303482587064677e-05,
      "loss": 0.0,
      "step": 4879
    },
    {
      "epoch": 1.4281533508925959,
      "grad_norm": 0.0005350884748622775,
      "learning_rate": 1.4296166227685103e-05,
      "loss": 0.0,
      "step": 4880
    },
    {
      "epoch": 1.4284460052677788,
      "grad_norm": 0.006955491844564676,
      "learning_rate": 1.4288849868305531e-05,
      "loss": 0.0001,
      "step": 4881
    },
    {
      "epoch": 1.4287386596429617,
      "grad_norm": 0.015475178137421608,
      "learning_rate": 1.4281533508925959e-05,
      "loss": 0.0001,
      "step": 4882
    },
    {
      "epoch": 1.4290313140181445,
      "grad_norm": 0.0012212616857141256,
      "learning_rate": 1.4274217149546387e-05,
      "loss": 0.0,
      "step": 4883
    },
    {
      "epoch": 1.4293239683933274,
      "grad_norm": 0.0003490689559839666,
      "learning_rate": 1.4266900790166815e-05,
      "loss": 0.0,
      "step": 4884
    },
    {
      "epoch": 1.4296166227685103,
      "grad_norm": 0.00045466903247870505,
      "learning_rate": 1.425958443078724e-05,
      "loss": 0.0,
      "step": 4885
    },
    {
      "epoch": 1.4299092771436932,
      "grad_norm": 0.0018789108144119382,
      "learning_rate": 1.4252268071407667e-05,
      "loss": 0.0,
      "step": 4886
    },
    {
      "epoch": 1.430201931518876,
      "grad_norm": 0.0007043906371109188,
      "learning_rate": 1.4244951712028095e-05,
      "loss": 0.0,
      "step": 4887
    },
    {
      "epoch": 1.430494585894059,
      "grad_norm": 0.0014209231594577432,
      "learning_rate": 1.4237635352648523e-05,
      "loss": 0.0,
      "step": 4888
    },
    {
      "epoch": 1.430787240269242,
      "grad_norm": 0.00036013106000609696,
      "learning_rate": 1.4230318993268951e-05,
      "loss": 0.0,
      "step": 4889
    },
    {
      "epoch": 1.431079894644425,
      "grad_norm": 0.000885677698533982,
      "learning_rate": 1.4223002633889376e-05,
      "loss": 0.0,
      "step": 4890
    },
    {
      "epoch": 1.4313725490196079,
      "grad_norm": 0.0004048089904244989,
      "learning_rate": 1.4215686274509804e-05,
      "loss": 0.0,
      "step": 4891
    },
    {
      "epoch": 1.4316652033947908,
      "grad_norm": 0.000629786285571754,
      "learning_rate": 1.4208369915130232e-05,
      "loss": 0.0,
      "step": 4892
    },
    {
      "epoch": 1.4319578577699736,
      "grad_norm": 0.0004508458368945867,
      "learning_rate": 1.420105355575066e-05,
      "loss": 0.0,
      "step": 4893
    },
    {
      "epoch": 1.4322505121451565,
      "grad_norm": 0.0013702167198061943,
      "learning_rate": 1.4193737196371088e-05,
      "loss": 0.0,
      "step": 4894
    },
    {
      "epoch": 1.4325431665203394,
      "grad_norm": 0.00038688714266754687,
      "learning_rate": 1.4186420836991512e-05,
      "loss": 0.0,
      "step": 4895
    },
    {
      "epoch": 1.4328358208955223,
      "grad_norm": 0.0003965978976339102,
      "learning_rate": 1.417910447761194e-05,
      "loss": 0.0,
      "step": 4896
    },
    {
      "epoch": 1.4331284752707054,
      "grad_norm": 0.001525850617326796,
      "learning_rate": 1.4171788118232368e-05,
      "loss": 0.0,
      "step": 4897
    },
    {
      "epoch": 1.4334211296458883,
      "grad_norm": 0.00031529253465123475,
      "learning_rate": 1.4164471758852796e-05,
      "loss": 0.0,
      "step": 4898
    },
    {
      "epoch": 1.4337137840210712,
      "grad_norm": 0.0014511430636048317,
      "learning_rate": 1.4157155399473224e-05,
      "loss": 0.0,
      "step": 4899
    },
    {
      "epoch": 1.434006438396254,
      "grad_norm": 0.003197466256096959,
      "learning_rate": 1.4149839040093649e-05,
      "loss": 0.0,
      "step": 4900
    },
    {
      "epoch": 1.434299092771437,
      "grad_norm": 0.00047766315401531756,
      "learning_rate": 1.4142522680714077e-05,
      "loss": 0.0,
      "step": 4901
    },
    {
      "epoch": 1.4345917471466199,
      "grad_norm": 0.00045633772970177233,
      "learning_rate": 1.4135206321334504e-05,
      "loss": 0.0,
      "step": 4902
    },
    {
      "epoch": 1.4348844015218027,
      "grad_norm": 0.0004014969163108617,
      "learning_rate": 1.4127889961954932e-05,
      "loss": 0.0,
      "step": 4903
    },
    {
      "epoch": 1.4351770558969856,
      "grad_norm": 0.0007565372507087886,
      "learning_rate": 1.412057360257536e-05,
      "loss": 0.0,
      "step": 4904
    },
    {
      "epoch": 1.4354697102721685,
      "grad_norm": 0.0011360982898622751,
      "learning_rate": 1.4113257243195788e-05,
      "loss": 0.0,
      "step": 4905
    },
    {
      "epoch": 1.4357623646473514,
      "grad_norm": 0.0017992169596254826,
      "learning_rate": 1.4105940883816213e-05,
      "loss": 0.0,
      "step": 4906
    },
    {
      "epoch": 1.4360550190225343,
      "grad_norm": 0.00038683050661347806,
      "learning_rate": 1.409862452443664e-05,
      "loss": 0.0,
      "step": 4907
    },
    {
      "epoch": 1.4363476733977172,
      "grad_norm": 0.001485635293647647,
      "learning_rate": 1.4091308165057069e-05,
      "loss": 0.0,
      "step": 4908
    },
    {
      "epoch": 1.4366403277729,
      "grad_norm": 0.0007270164205692708,
      "learning_rate": 1.4083991805677497e-05,
      "loss": 0.0,
      "step": 4909
    },
    {
      "epoch": 1.4369329821480832,
      "grad_norm": 0.0008282930357381701,
      "learning_rate": 1.4076675446297925e-05,
      "loss": 0.0,
      "step": 4910
    },
    {
      "epoch": 1.437225636523266,
      "grad_norm": 17.508834838867188,
      "learning_rate": 1.406935908691835e-05,
      "loss": 0.0674,
      "step": 4911
    },
    {
      "epoch": 1.437518290898449,
      "grad_norm": 0.0011270351242274046,
      "learning_rate": 1.4062042727538777e-05,
      "loss": 0.0,
      "step": 4912
    },
    {
      "epoch": 1.4378109452736318,
      "grad_norm": 0.0009964543860405684,
      "learning_rate": 1.4054726368159205e-05,
      "loss": 0.0,
      "step": 4913
    },
    {
      "epoch": 1.4381035996488147,
      "grad_norm": 0.0013905660016462207,
      "learning_rate": 1.4047410008779633e-05,
      "loss": 0.0,
      "step": 4914
    },
    {
      "epoch": 1.4383962540239976,
      "grad_norm": 0.00012497082934714854,
      "learning_rate": 1.404009364940006e-05,
      "loss": 0.0,
      "step": 4915
    },
    {
      "epoch": 1.4386889083991805,
      "grad_norm": 0.19590729475021362,
      "learning_rate": 1.4032777290020486e-05,
      "loss": 0.0005,
      "step": 4916
    },
    {
      "epoch": 1.4389815627743634,
      "grad_norm": 0.0005345430108718574,
      "learning_rate": 1.4025460930640914e-05,
      "loss": 0.0,
      "step": 4917
    },
    {
      "epoch": 1.4392742171495465,
      "grad_norm": 0.0005475014913827181,
      "learning_rate": 1.4018144571261342e-05,
      "loss": 0.0,
      "step": 4918
    },
    {
      "epoch": 1.4395668715247294,
      "grad_norm": 0.0006744549027644098,
      "learning_rate": 1.4010828211881768e-05,
      "loss": 0.0,
      "step": 4919
    },
    {
      "epoch": 1.4398595258999123,
      "grad_norm": 0.0010098711354658008,
      "learning_rate": 1.4003511852502196e-05,
      "loss": 0.0,
      "step": 4920
    },
    {
      "epoch": 1.4401521802750952,
      "grad_norm": 0.00028649295563809574,
      "learning_rate": 1.3996195493122624e-05,
      "loss": 0.0,
      "step": 4921
    },
    {
      "epoch": 1.440444834650278,
      "grad_norm": 5.341841220855713,
      "learning_rate": 1.398887913374305e-05,
      "loss": 0.0083,
      "step": 4922
    },
    {
      "epoch": 1.440737489025461,
      "grad_norm": 0.0038020676001906395,
      "learning_rate": 1.3981562774363476e-05,
      "loss": 0.0001,
      "step": 4923
    },
    {
      "epoch": 1.4410301434006438,
      "grad_norm": 0.0007207989692687988,
      "learning_rate": 1.3974246414983904e-05,
      "loss": 0.0,
      "step": 4924
    },
    {
      "epoch": 1.4413227977758267,
      "grad_norm": 0.0017718099988996983,
      "learning_rate": 1.3966930055604332e-05,
      "loss": 0.0,
      "step": 4925
    },
    {
      "epoch": 1.4416154521510096,
      "grad_norm": 0.0026354538276791573,
      "learning_rate": 1.395961369622476e-05,
      "loss": 0.0,
      "step": 4926
    },
    {
      "epoch": 1.4419081065261925,
      "grad_norm": 0.0006366702727973461,
      "learning_rate": 1.3952297336845185e-05,
      "loss": 0.0,
      "step": 4927
    },
    {
      "epoch": 1.4422007609013754,
      "grad_norm": 0.0007797402213327587,
      "learning_rate": 1.3944980977465613e-05,
      "loss": 0.0,
      "step": 4928
    },
    {
      "epoch": 1.4424934152765583,
      "grad_norm": 0.0004057021869812161,
      "learning_rate": 1.393766461808604e-05,
      "loss": 0.0,
      "step": 4929
    },
    {
      "epoch": 1.4427860696517412,
      "grad_norm": 0.0004022572538815439,
      "learning_rate": 1.3930348258706468e-05,
      "loss": 0.0,
      "step": 4930
    },
    {
      "epoch": 1.4430787240269243,
      "grad_norm": 0.0011776091996580362,
      "learning_rate": 1.3923031899326896e-05,
      "loss": 0.0,
      "step": 4931
    },
    {
      "epoch": 1.4433713784021072,
      "grad_norm": 0.0002888218150474131,
      "learning_rate": 1.3915715539947321e-05,
      "loss": 0.0,
      "step": 4932
    },
    {
      "epoch": 1.44366403277729,
      "grad_norm": 0.0005171523662284017,
      "learning_rate": 1.3908399180567749e-05,
      "loss": 0.0,
      "step": 4933
    },
    {
      "epoch": 1.443956687152473,
      "grad_norm": 0.0003763338609132916,
      "learning_rate": 1.3901082821188177e-05,
      "loss": 0.0,
      "step": 4934
    },
    {
      "epoch": 1.4442493415276558,
      "grad_norm": 0.0005641872994601727,
      "learning_rate": 1.3893766461808605e-05,
      "loss": 0.0,
      "step": 4935
    },
    {
      "epoch": 1.4445419959028387,
      "grad_norm": 0.00030219266773201525,
      "learning_rate": 1.3886450102429033e-05,
      "loss": 0.0,
      "step": 4936
    },
    {
      "epoch": 1.4448346502780216,
      "grad_norm": 0.000511305988766253,
      "learning_rate": 1.387913374304946e-05,
      "loss": 0.0,
      "step": 4937
    },
    {
      "epoch": 1.4451273046532045,
      "grad_norm": 0.00015900484868325293,
      "learning_rate": 1.3871817383669885e-05,
      "loss": 0.0,
      "step": 4938
    },
    {
      "epoch": 1.4454199590283876,
      "grad_norm": 0.0038649272173643112,
      "learning_rate": 1.3864501024290313e-05,
      "loss": 0.0,
      "step": 4939
    },
    {
      "epoch": 1.4457126134035705,
      "grad_norm": 0.0005638069706037641,
      "learning_rate": 1.3857184664910741e-05,
      "loss": 0.0,
      "step": 4940
    },
    {
      "epoch": 1.4460052677787534,
      "grad_norm": 0.00047475972678512335,
      "learning_rate": 1.3849868305531169e-05,
      "loss": 0.0,
      "step": 4941
    },
    {
      "epoch": 1.4462979221539363,
      "grad_norm": 0.00030017804238013923,
      "learning_rate": 1.3842551946151597e-05,
      "loss": 0.0,
      "step": 4942
    },
    {
      "epoch": 1.4465905765291192,
      "grad_norm": 0.000523193331900984,
      "learning_rate": 1.3835235586772022e-05,
      "loss": 0.0,
      "step": 4943
    },
    {
      "epoch": 1.446883230904302,
      "grad_norm": 0.0013426643563434482,
      "learning_rate": 1.382791922739245e-05,
      "loss": 0.0,
      "step": 4944
    },
    {
      "epoch": 1.447175885279485,
      "grad_norm": 0.001148558221757412,
      "learning_rate": 1.3820602868012878e-05,
      "loss": 0.0,
      "step": 4945
    },
    {
      "epoch": 1.4474685396546678,
      "grad_norm": 0.0019947707187384367,
      "learning_rate": 1.3813286508633305e-05,
      "loss": 0.0,
      "step": 4946
    },
    {
      "epoch": 1.4477611940298507,
      "grad_norm": 0.0031954795122146606,
      "learning_rate": 1.3805970149253733e-05,
      "loss": 0.0,
      "step": 4947
    },
    {
      "epoch": 1.4480538484050336,
      "grad_norm": 0.002721356926485896,
      "learning_rate": 1.3798653789874158e-05,
      "loss": 0.0,
      "step": 4948
    },
    {
      "epoch": 1.4483465027802165,
      "grad_norm": 0.0005469535244628787,
      "learning_rate": 1.3791337430494586e-05,
      "loss": 0.0,
      "step": 4949
    },
    {
      "epoch": 1.4486391571553994,
      "grad_norm": 0.0012882015435025096,
      "learning_rate": 1.3784021071115014e-05,
      "loss": 0.0,
      "step": 4950
    },
    {
      "epoch": 1.4489318115305823,
      "grad_norm": 0.0010478420881554484,
      "learning_rate": 1.3776704711735442e-05,
      "loss": 0.0,
      "step": 4951
    },
    {
      "epoch": 1.4492244659057654,
      "grad_norm": 0.002568156225606799,
      "learning_rate": 1.376938835235587e-05,
      "loss": 0.0,
      "step": 4952
    },
    {
      "epoch": 1.4495171202809483,
      "grad_norm": 15.102858543395996,
      "learning_rate": 1.3762071992976298e-05,
      "loss": 0.1145,
      "step": 4953
    },
    {
      "epoch": 1.4498097746561311,
      "grad_norm": 0.006282886490225792,
      "learning_rate": 1.3754755633596722e-05,
      "loss": 0.0001,
      "step": 4954
    },
    {
      "epoch": 1.450102429031314,
      "grad_norm": 0.0005572250229306519,
      "learning_rate": 1.374743927421715e-05,
      "loss": 0.0,
      "step": 4955
    },
    {
      "epoch": 1.450395083406497,
      "grad_norm": 0.000361787126166746,
      "learning_rate": 1.3740122914837578e-05,
      "loss": 0.0,
      "step": 4956
    },
    {
      "epoch": 1.4506877377816798,
      "grad_norm": 0.00026979981339536607,
      "learning_rate": 1.3732806555458006e-05,
      "loss": 0.0,
      "step": 4957
    },
    {
      "epoch": 1.4509803921568627,
      "grad_norm": 0.0007593963528051972,
      "learning_rate": 1.3725490196078432e-05,
      "loss": 0.0,
      "step": 4958
    },
    {
      "epoch": 1.4512730465320456,
      "grad_norm": 0.0003041128220502287,
      "learning_rate": 1.3718173836698859e-05,
      "loss": 0.0,
      "step": 4959
    },
    {
      "epoch": 1.4515657009072287,
      "grad_norm": 0.00046027437201701105,
      "learning_rate": 1.3710857477319287e-05,
      "loss": 0.0,
      "step": 4960
    },
    {
      "epoch": 1.4518583552824116,
      "grad_norm": 0.0025288264732807875,
      "learning_rate": 1.3703541117939715e-05,
      "loss": 0.0,
      "step": 4961
    },
    {
      "epoch": 1.4521510096575945,
      "grad_norm": 0.00045756640611216426,
      "learning_rate": 1.369622475856014e-05,
      "loss": 0.0,
      "step": 4962
    },
    {
      "epoch": 1.4524436640327774,
      "grad_norm": 0.0004389963869471103,
      "learning_rate": 1.3688908399180569e-05,
      "loss": 0.0,
      "step": 4963
    },
    {
      "epoch": 1.4527363184079602,
      "grad_norm": 0.0001569542509969324,
      "learning_rate": 1.3681592039800995e-05,
      "loss": 0.0,
      "step": 4964
    },
    {
      "epoch": 1.4530289727831431,
      "grad_norm": 0.002118258038535714,
      "learning_rate": 1.3674275680421423e-05,
      "loss": 0.0,
      "step": 4965
    },
    {
      "epoch": 1.453321627158326,
      "grad_norm": 0.0007403275230899453,
      "learning_rate": 1.366695932104185e-05,
      "loss": 0.0,
      "step": 4966
    },
    {
      "epoch": 1.453614281533509,
      "grad_norm": 0.0005932514322921634,
      "learning_rate": 1.3659642961662277e-05,
      "loss": 0.0,
      "step": 4967
    },
    {
      "epoch": 1.4539069359086918,
      "grad_norm": 0.000845657370518893,
      "learning_rate": 1.3652326602282705e-05,
      "loss": 0.0,
      "step": 4968
    },
    {
      "epoch": 1.4541995902838747,
      "grad_norm": 0.004051250871270895,
      "learning_rate": 1.3645010242903131e-05,
      "loss": 0.0,
      "step": 4969
    },
    {
      "epoch": 1.4544922446590576,
      "grad_norm": 0.0012087548384442925,
      "learning_rate": 1.3637693883523558e-05,
      "loss": 0.0,
      "step": 4970
    },
    {
      "epoch": 1.4547848990342405,
      "grad_norm": 0.2617930471897125,
      "learning_rate": 1.3630377524143986e-05,
      "loss": 0.0006,
      "step": 4971
    },
    {
      "epoch": 1.4550775534094234,
      "grad_norm": 0.0018058578716591,
      "learning_rate": 1.3623061164764414e-05,
      "loss": 0.0,
      "step": 4972
    },
    {
      "epoch": 1.4553702077846065,
      "grad_norm": 0.00019489476107992232,
      "learning_rate": 1.3615744805384842e-05,
      "loss": 0.0,
      "step": 4973
    },
    {
      "epoch": 1.4556628621597894,
      "grad_norm": 0.00017870304873213172,
      "learning_rate": 1.360842844600527e-05,
      "loss": 0.0,
      "step": 4974
    },
    {
      "epoch": 1.4559555165349722,
      "grad_norm": 0.009403640404343605,
      "learning_rate": 1.3601112086625694e-05,
      "loss": 0.0001,
      "step": 4975
    },
    {
      "epoch": 1.4562481709101551,
      "grad_norm": 0.0010502643417567015,
      "learning_rate": 1.3593795727246122e-05,
      "loss": 0.0,
      "step": 4976
    },
    {
      "epoch": 1.456540825285338,
      "grad_norm": 0.0006351264892145991,
      "learning_rate": 1.358647936786655e-05,
      "loss": 0.0,
      "step": 4977
    },
    {
      "epoch": 1.456833479660521,
      "grad_norm": 0.00040663781692273915,
      "learning_rate": 1.3579163008486978e-05,
      "loss": 0.0,
      "step": 4978
    },
    {
      "epoch": 1.4571261340357038,
      "grad_norm": 0.00038158282404765487,
      "learning_rate": 1.3571846649107406e-05,
      "loss": 0.0,
      "step": 4979
    },
    {
      "epoch": 1.4574187884108867,
      "grad_norm": 0.00010390720126451924,
      "learning_rate": 1.356453028972783e-05,
      "loss": 0.0,
      "step": 4980
    },
    {
      "epoch": 1.4577114427860698,
      "grad_norm": 0.000412240216974169,
      "learning_rate": 1.3557213930348258e-05,
      "loss": 0.0,
      "step": 4981
    },
    {
      "epoch": 1.4580040971612527,
      "grad_norm": 0.000625427404884249,
      "learning_rate": 1.3549897570968686e-05,
      "loss": 0.0,
      "step": 4982
    },
    {
      "epoch": 1.4582967515364356,
      "grad_norm": 0.0014911566395312548,
      "learning_rate": 1.3542581211589114e-05,
      "loss": 0.0,
      "step": 4983
    },
    {
      "epoch": 1.4585894059116185,
      "grad_norm": 0.0005219570593908429,
      "learning_rate": 1.3535264852209542e-05,
      "loss": 0.0,
      "step": 4984
    },
    {
      "epoch": 1.4588820602868013,
      "grad_norm": 0.005115315783768892,
      "learning_rate": 1.3527948492829967e-05,
      "loss": 0.0001,
      "step": 4985
    },
    {
      "epoch": 1.4591747146619842,
      "grad_norm": 0.0006503094919025898,
      "learning_rate": 1.3520632133450395e-05,
      "loss": 0.0,
      "step": 4986
    },
    {
      "epoch": 1.4594673690371671,
      "grad_norm": 0.0003797007957473397,
      "learning_rate": 1.3513315774070823e-05,
      "loss": 0.0,
      "step": 4987
    },
    {
      "epoch": 1.45976002341235,
      "grad_norm": 0.0003924908523913473,
      "learning_rate": 1.350599941469125e-05,
      "loss": 0.0,
      "step": 4988
    },
    {
      "epoch": 1.460052677787533,
      "grad_norm": 0.00021261059737298638,
      "learning_rate": 1.3498683055311679e-05,
      "loss": 0.0,
      "step": 4989
    },
    {
      "epoch": 1.4603453321627158,
      "grad_norm": 0.0020935856737196445,
      "learning_rate": 1.3491366695932107e-05,
      "loss": 0.0,
      "step": 4990
    },
    {
      "epoch": 1.4606379865378987,
      "grad_norm": 0.0005655603599734604,
      "learning_rate": 1.3484050336552531e-05,
      "loss": 0.0,
      "step": 4991
    },
    {
      "epoch": 1.4609306409130816,
      "grad_norm": 0.0003155624435748905,
      "learning_rate": 1.3476733977172959e-05,
      "loss": 0.0,
      "step": 4992
    },
    {
      "epoch": 1.4612232952882644,
      "grad_norm": 0.0008528819889761508,
      "learning_rate": 1.3469417617793387e-05,
      "loss": 0.0,
      "step": 4993
    },
    {
      "epoch": 1.4615159496634473,
      "grad_norm": 0.0002827745920512825,
      "learning_rate": 1.3462101258413815e-05,
      "loss": 0.0,
      "step": 4994
    },
    {
      "epoch": 1.4618086040386304,
      "grad_norm": 0.0006649006390944123,
      "learning_rate": 1.3454784899034243e-05,
      "loss": 0.0,
      "step": 4995
    },
    {
      "epoch": 1.4621012584138133,
      "grad_norm": 0.0003897503484040499,
      "learning_rate": 1.3447468539654667e-05,
      "loss": 0.0,
      "step": 4996
    },
    {
      "epoch": 1.4623939127889962,
      "grad_norm": 0.0005316457245498896,
      "learning_rate": 1.3440152180275095e-05,
      "loss": 0.0,
      "step": 4997
    },
    {
      "epoch": 1.462686567164179,
      "grad_norm": 0.00031724441214464605,
      "learning_rate": 1.3432835820895523e-05,
      "loss": 0.0,
      "step": 4998
    },
    {
      "epoch": 1.462979221539362,
      "grad_norm": 0.0014708181843161583,
      "learning_rate": 1.3425519461515951e-05,
      "loss": 0.0,
      "step": 4999
    },
    {
      "epoch": 1.4632718759145449,
      "grad_norm": 0.0015603512292727828,
      "learning_rate": 1.341820310213638e-05,
      "loss": 0.0,
      "step": 5000
    },
    {
      "epoch": 1.4635645302897278,
      "grad_norm": 0.0002041055413428694,
      "learning_rate": 1.3410886742756804e-05,
      "loss": 0.0,
      "step": 5001
    },
    {
      "epoch": 1.4638571846649107,
      "grad_norm": 0.010169253684580326,
      "learning_rate": 1.3403570383377232e-05,
      "loss": 0.0001,
      "step": 5002
    },
    {
      "epoch": 1.4641498390400938,
      "grad_norm": 0.0003600012860260904,
      "learning_rate": 1.339625402399766e-05,
      "loss": 0.0,
      "step": 5003
    },
    {
      "epoch": 1.4644424934152767,
      "grad_norm": 0.00545900035649538,
      "learning_rate": 1.3388937664618088e-05,
      "loss": 0.0001,
      "step": 5004
    },
    {
      "epoch": 1.4647351477904595,
      "grad_norm": 0.00021998106967657804,
      "learning_rate": 1.3381621305238516e-05,
      "loss": 0.0,
      "step": 5005
    },
    {
      "epoch": 1.4650278021656424,
      "grad_norm": 0.0014308503596112132,
      "learning_rate": 1.3374304945858942e-05,
      "loss": 0.0,
      "step": 5006
    },
    {
      "epoch": 1.4653204565408253,
      "grad_norm": 0.0008946347516030073,
      "learning_rate": 1.3366988586479368e-05,
      "loss": 0.0,
      "step": 5007
    },
    {
      "epoch": 1.4656131109160082,
      "grad_norm": 0.0009333545458503067,
      "learning_rate": 1.3359672227099796e-05,
      "loss": 0.0,
      "step": 5008
    },
    {
      "epoch": 1.465905765291191,
      "grad_norm": 0.0006259658839553595,
      "learning_rate": 1.3352355867720224e-05,
      "loss": 0.0,
      "step": 5009
    },
    {
      "epoch": 1.466198419666374,
      "grad_norm": 0.00019977972260676324,
      "learning_rate": 1.334503950834065e-05,
      "loss": 0.0,
      "step": 5010
    },
    {
      "epoch": 1.4664910740415569,
      "grad_norm": 0.0003582122444640845,
      "learning_rate": 1.3337723148961078e-05,
      "loss": 0.0,
      "step": 5011
    },
    {
      "epoch": 1.4667837284167398,
      "grad_norm": 0.0016777211567386985,
      "learning_rate": 1.3330406789581504e-05,
      "loss": 0.0,
      "step": 5012
    },
    {
      "epoch": 1.4670763827919227,
      "grad_norm": 0.0008100473205558956,
      "learning_rate": 1.3323090430201932e-05,
      "loss": 0.0,
      "step": 5013
    },
    {
      "epoch": 1.4673690371671055,
      "grad_norm": 0.00014631861995439976,
      "learning_rate": 1.3315774070822359e-05,
      "loss": 0.0,
      "step": 5014
    },
    {
      "epoch": 1.4676616915422884,
      "grad_norm": 0.0002567546034697443,
      "learning_rate": 1.3308457711442787e-05,
      "loss": 0.0,
      "step": 5015
    },
    {
      "epoch": 1.4679543459174715,
      "grad_norm": 9.500472515355796e-05,
      "learning_rate": 1.3301141352063215e-05,
      "loss": 0.0,
      "step": 5016
    },
    {
      "epoch": 1.4682470002926544,
      "grad_norm": 0.0003111510304734111,
      "learning_rate": 1.329382499268364e-05,
      "loss": 0.0,
      "step": 5017
    },
    {
      "epoch": 1.4685396546678373,
      "grad_norm": 0.001199898892082274,
      "learning_rate": 1.3286508633304067e-05,
      "loss": 0.0,
      "step": 5018
    },
    {
      "epoch": 1.4688323090430202,
      "grad_norm": 0.00024551950627937913,
      "learning_rate": 1.3279192273924495e-05,
      "loss": 0.0,
      "step": 5019
    },
    {
      "epoch": 1.469124963418203,
      "grad_norm": 0.00043802321306429803,
      "learning_rate": 1.3271875914544923e-05,
      "loss": 0.0,
      "step": 5020
    },
    {
      "epoch": 1.469417617793386,
      "grad_norm": 0.0006880344590172172,
      "learning_rate": 1.3264559555165351e-05,
      "loss": 0.0,
      "step": 5021
    },
    {
      "epoch": 1.4697102721685689,
      "grad_norm": 0.00022182802786119282,
      "learning_rate": 1.3257243195785779e-05,
      "loss": 0.0,
      "step": 5022
    },
    {
      "epoch": 1.4700029265437518,
      "grad_norm": 0.000889520684722811,
      "learning_rate": 1.3249926836406203e-05,
      "loss": 0.0,
      "step": 5023
    },
    {
      "epoch": 1.4702955809189349,
      "grad_norm": 0.0015817388193681836,
      "learning_rate": 1.3242610477026631e-05,
      "loss": 0.0,
      "step": 5024
    },
    {
      "epoch": 1.4705882352941178,
      "grad_norm": 0.0004126756975892931,
      "learning_rate": 1.323529411764706e-05,
      "loss": 0.0,
      "step": 5025
    },
    {
      "epoch": 1.4708808896693006,
      "grad_norm": 0.0004018440959043801,
      "learning_rate": 1.3227977758267487e-05,
      "loss": 0.0,
      "step": 5026
    },
    {
      "epoch": 1.4711735440444835,
      "grad_norm": 3.947730779647827,
      "learning_rate": 1.3220661398887915e-05,
      "loss": 0.0052,
      "step": 5027
    },
    {
      "epoch": 1.4714661984196664,
      "grad_norm": 0.00024372065672650933,
      "learning_rate": 1.321334503950834e-05,
      "loss": 0.0,
      "step": 5028
    },
    {
      "epoch": 1.4717588527948493,
      "grad_norm": 0.00025667098816484213,
      "learning_rate": 1.3206028680128768e-05,
      "loss": 0.0,
      "step": 5029
    },
    {
      "epoch": 1.4720515071700322,
      "grad_norm": 0.0001714712561806664,
      "learning_rate": 1.3198712320749196e-05,
      "loss": 0.0,
      "step": 5030
    },
    {
      "epoch": 1.472344161545215,
      "grad_norm": 0.0007231322815641761,
      "learning_rate": 1.3191395961369624e-05,
      "loss": 0.0,
      "step": 5031
    },
    {
      "epoch": 1.472636815920398,
      "grad_norm": 3.6601428985595703,
      "learning_rate": 1.3184079601990052e-05,
      "loss": 0.1682,
      "step": 5032
    },
    {
      "epoch": 1.4729294702955809,
      "grad_norm": 0.0038921311497688293,
      "learning_rate": 1.3176763242610476e-05,
      "loss": 0.0,
      "step": 5033
    },
    {
      "epoch": 1.4732221246707637,
      "grad_norm": 0.00017758534522727132,
      "learning_rate": 1.3169446883230904e-05,
      "loss": 0.0,
      "step": 5034
    },
    {
      "epoch": 1.4735147790459466,
      "grad_norm": 0.001061857445165515,
      "learning_rate": 1.3162130523851332e-05,
      "loss": 0.0,
      "step": 5035
    },
    {
      "epoch": 1.4738074334211295,
      "grad_norm": 11.057812690734863,
      "learning_rate": 1.315481416447176e-05,
      "loss": 0.1485,
      "step": 5036
    },
    {
      "epoch": 1.4741000877963126,
      "grad_norm": 0.0003560293698683381,
      "learning_rate": 1.3147497805092188e-05,
      "loss": 0.0,
      "step": 5037
    },
    {
      "epoch": 1.4743927421714955,
      "grad_norm": 0.0003879110736306757,
      "learning_rate": 1.3140181445712613e-05,
      "loss": 0.0,
      "step": 5038
    },
    {
      "epoch": 1.4746853965466784,
      "grad_norm": 0.0008185947081074119,
      "learning_rate": 1.313286508633304e-05,
      "loss": 0.0,
      "step": 5039
    },
    {
      "epoch": 1.4749780509218613,
      "grad_norm": 0.0019864842761307955,
      "learning_rate": 1.3125548726953468e-05,
      "loss": 0.0,
      "step": 5040
    },
    {
      "epoch": 1.4752707052970442,
      "grad_norm": 0.0014067594893276691,
      "learning_rate": 1.3118232367573896e-05,
      "loss": 0.0,
      "step": 5041
    },
    {
      "epoch": 1.475563359672227,
      "grad_norm": 0.0010090309660881758,
      "learning_rate": 1.3110916008194324e-05,
      "loss": 0.0,
      "step": 5042
    },
    {
      "epoch": 1.47585601404741,
      "grad_norm": 0.00034172378946095705,
      "learning_rate": 1.3103599648814752e-05,
      "loss": 0.0,
      "step": 5043
    },
    {
      "epoch": 1.4761486684225928,
      "grad_norm": 0.001443683635443449,
      "learning_rate": 1.3096283289435177e-05,
      "loss": 0.0,
      "step": 5044
    },
    {
      "epoch": 1.476441322797776,
      "grad_norm": 0.0004092727031093091,
      "learning_rate": 1.3088966930055605e-05,
      "loss": 0.0,
      "step": 5045
    },
    {
      "epoch": 1.4767339771729588,
      "grad_norm": 0.013661233708262444,
      "learning_rate": 1.3081650570676033e-05,
      "loss": 0.0001,
      "step": 5046
    },
    {
      "epoch": 1.4770266315481417,
      "grad_norm": 0.0003617628535721451,
      "learning_rate": 1.307433421129646e-05,
      "loss": 0.0,
      "step": 5047
    },
    {
      "epoch": 1.4773192859233246,
      "grad_norm": 0.000713932910002768,
      "learning_rate": 1.3067017851916889e-05,
      "loss": 0.0,
      "step": 5048
    },
    {
      "epoch": 1.4776119402985075,
      "grad_norm": 0.0007373035768978298,
      "learning_rate": 1.3059701492537313e-05,
      "loss": 0.0,
      "step": 5049
    },
    {
      "epoch": 1.4779045946736904,
      "grad_norm": 0.0002988100459333509,
      "learning_rate": 1.3052385133157741e-05,
      "loss": 0.0,
      "step": 5050
    },
    {
      "epoch": 1.4781972490488733,
      "grad_norm": 0.0005745989619754255,
      "learning_rate": 1.3045068773778169e-05,
      "loss": 0.0,
      "step": 5051
    },
    {
      "epoch": 1.4784899034240562,
      "grad_norm": 0.0006586909294128418,
      "learning_rate": 1.3037752414398597e-05,
      "loss": 0.0,
      "step": 5052
    },
    {
      "epoch": 1.478782557799239,
      "grad_norm": 0.01472164411097765,
      "learning_rate": 1.3030436055019023e-05,
      "loss": 0.0002,
      "step": 5053
    },
    {
      "epoch": 1.479075212174422,
      "grad_norm": 0.02453051321208477,
      "learning_rate": 1.302311969563945e-05,
      "loss": 0.0002,
      "step": 5054
    },
    {
      "epoch": 1.4793678665496048,
      "grad_norm": 0.0015729361912235618,
      "learning_rate": 1.3015803336259878e-05,
      "loss": 0.0,
      "step": 5055
    },
    {
      "epoch": 1.4796605209247877,
      "grad_norm": 0.00018458921113051474,
      "learning_rate": 1.3008486976880305e-05,
      "loss": 0.0,
      "step": 5056
    },
    {
      "epoch": 1.4799531752999706,
      "grad_norm": 0.001480237115174532,
      "learning_rate": 1.3001170617500732e-05,
      "loss": 0.0,
      "step": 5057
    },
    {
      "epoch": 1.4802458296751537,
      "grad_norm": 0.000548430485650897,
      "learning_rate": 1.299385425812116e-05,
      "loss": 0.0,
      "step": 5058
    },
    {
      "epoch": 1.4805384840503366,
      "grad_norm": 0.0007473621517419815,
      "learning_rate": 1.2986537898741588e-05,
      "loss": 0.0,
      "step": 5059
    },
    {
      "epoch": 1.4808311384255195,
      "grad_norm": 0.0016481553902849555,
      "learning_rate": 1.2979221539362014e-05,
      "loss": 0.0,
      "step": 5060
    },
    {
      "epoch": 1.4811237928007024,
      "grad_norm": 0.0009578980389051139,
      "learning_rate": 1.297190517998244e-05,
      "loss": 0.0,
      "step": 5061
    },
    {
      "epoch": 1.4814164471758853,
      "grad_norm": 0.0005476105725392699,
      "learning_rate": 1.2964588820602868e-05,
      "loss": 0.0,
      "step": 5062
    },
    {
      "epoch": 1.4817091015510682,
      "grad_norm": 0.012019267305731773,
      "learning_rate": 1.2957272461223296e-05,
      "loss": 0.0001,
      "step": 5063
    },
    {
      "epoch": 1.482001755926251,
      "grad_norm": 0.0003678315260913223,
      "learning_rate": 1.2949956101843724e-05,
      "loss": 0.0,
      "step": 5064
    },
    {
      "epoch": 1.482294410301434,
      "grad_norm": 0.00038286292692646384,
      "learning_rate": 1.2942639742464149e-05,
      "loss": 0.0,
      "step": 5065
    },
    {
      "epoch": 1.482587064676617,
      "grad_norm": 0.004511554725468159,
      "learning_rate": 1.2935323383084577e-05,
      "loss": 0.0001,
      "step": 5066
    },
    {
      "epoch": 1.4828797190518,
      "grad_norm": 0.0002436785725876689,
      "learning_rate": 1.2928007023705004e-05,
      "loss": 0.0,
      "step": 5067
    },
    {
      "epoch": 1.4831723734269828,
      "grad_norm": 0.00030211356352083385,
      "learning_rate": 1.2920690664325432e-05,
      "loss": 0.0,
      "step": 5068
    },
    {
      "epoch": 1.4834650278021657,
      "grad_norm": 0.0012050060322508216,
      "learning_rate": 1.291337430494586e-05,
      "loss": 0.0,
      "step": 5069
    },
    {
      "epoch": 1.4837576821773486,
      "grad_norm": 0.0007688667974434793,
      "learning_rate": 1.2906057945566285e-05,
      "loss": 0.0,
      "step": 5070
    },
    {
      "epoch": 1.4840503365525315,
      "grad_norm": 0.00024990117526613176,
      "learning_rate": 1.2898741586186713e-05,
      "loss": 0.0,
      "step": 5071
    },
    {
      "epoch": 1.4843429909277144,
      "grad_norm": 0.00033757698838599026,
      "learning_rate": 1.289142522680714e-05,
      "loss": 0.0,
      "step": 5072
    },
    {
      "epoch": 1.4846356453028973,
      "grad_norm": 0.0011487255105748773,
      "learning_rate": 1.2884108867427569e-05,
      "loss": 0.0,
      "step": 5073
    },
    {
      "epoch": 1.4849282996780802,
      "grad_norm": 0.0008633338147774339,
      "learning_rate": 1.2876792508047997e-05,
      "loss": 0.0,
      "step": 5074
    },
    {
      "epoch": 1.485220954053263,
      "grad_norm": 0.0006356406374834478,
      "learning_rate": 1.2869476148668425e-05,
      "loss": 0.0,
      "step": 5075
    },
    {
      "epoch": 1.485513608428446,
      "grad_norm": 0.0013006513472646475,
      "learning_rate": 1.286215978928885e-05,
      "loss": 0.0,
      "step": 5076
    },
    {
      "epoch": 1.4858062628036288,
      "grad_norm": 0.32902756333351135,
      "learning_rate": 1.2854843429909277e-05,
      "loss": 0.0013,
      "step": 5077
    },
    {
      "epoch": 1.4860989171788117,
      "grad_norm": 0.0004978632205165923,
      "learning_rate": 1.2847527070529705e-05,
      "loss": 0.0,
      "step": 5078
    },
    {
      "epoch": 1.4863915715539946,
      "grad_norm": 0.00033494128729216754,
      "learning_rate": 1.2840210711150133e-05,
      "loss": 0.0,
      "step": 5079
    },
    {
      "epoch": 1.4866842259291777,
      "grad_norm": 0.00036529218778014183,
      "learning_rate": 1.2832894351770561e-05,
      "loss": 0.0,
      "step": 5080
    },
    {
      "epoch": 1.4869768803043606,
      "grad_norm": 0.0002138021809514612,
      "learning_rate": 1.2825577992390986e-05,
      "loss": 0.0,
      "step": 5081
    },
    {
      "epoch": 1.4872695346795435,
      "grad_norm": 0.0006728937150910497,
      "learning_rate": 1.2818261633011414e-05,
      "loss": 0.0,
      "step": 5082
    },
    {
      "epoch": 1.4875621890547264,
      "grad_norm": 0.00042946042958647013,
      "learning_rate": 1.2810945273631842e-05,
      "loss": 0.0,
      "step": 5083
    },
    {
      "epoch": 1.4878548434299093,
      "grad_norm": 0.0011009355075657368,
      "learning_rate": 1.280362891425227e-05,
      "loss": 0.0,
      "step": 5084
    },
    {
      "epoch": 1.4881474978050921,
      "grad_norm": 0.00038497481727972627,
      "learning_rate": 1.2796312554872697e-05,
      "loss": 0.0,
      "step": 5085
    },
    {
      "epoch": 1.488440152180275,
      "grad_norm": 0.00034325546585023403,
      "learning_rate": 1.2788996195493122e-05,
      "loss": 0.0,
      "step": 5086
    },
    {
      "epoch": 1.488732806555458,
      "grad_norm": 0.0006645070970989764,
      "learning_rate": 1.278167983611355e-05,
      "loss": 0.0,
      "step": 5087
    },
    {
      "epoch": 1.489025460930641,
      "grad_norm": 0.007781506050378084,
      "learning_rate": 1.2774363476733978e-05,
      "loss": 0.0001,
      "step": 5088
    },
    {
      "epoch": 1.489318115305824,
      "grad_norm": 0.0004191446350887418,
      "learning_rate": 1.2767047117354406e-05,
      "loss": 0.0,
      "step": 5089
    },
    {
      "epoch": 1.4896107696810068,
      "grad_norm": 0.0004313608515076339,
      "learning_rate": 1.2759730757974834e-05,
      "loss": 0.0,
      "step": 5090
    },
    {
      "epoch": 1.4899034240561897,
      "grad_norm": 0.8468864560127258,
      "learning_rate": 1.2752414398595262e-05,
      "loss": 0.0048,
      "step": 5091
    },
    {
      "epoch": 1.4901960784313726,
      "grad_norm": 0.009637207724153996,
      "learning_rate": 1.2745098039215686e-05,
      "loss": 0.0001,
      "step": 5092
    },
    {
      "epoch": 1.4904887328065555,
      "grad_norm": 0.001110792625695467,
      "learning_rate": 1.2737781679836114e-05,
      "loss": 0.0,
      "step": 5093
    },
    {
      "epoch": 1.4907813871817384,
      "grad_norm": 0.041818760335445404,
      "learning_rate": 1.2730465320456542e-05,
      "loss": 0.0002,
      "step": 5094
    },
    {
      "epoch": 1.4910740415569212,
      "grad_norm": 0.0006818793481215835,
      "learning_rate": 1.272314896107697e-05,
      "loss": 0.0,
      "step": 5095
    },
    {
      "epoch": 1.4913666959321041,
      "grad_norm": 0.03327636420726776,
      "learning_rate": 1.2715832601697396e-05,
      "loss": 0.0001,
      "step": 5096
    },
    {
      "epoch": 1.491659350307287,
      "grad_norm": 0.0002938769175671041,
      "learning_rate": 1.2708516242317823e-05,
      "loss": 0.0,
      "step": 5097
    },
    {
      "epoch": 1.49195200468247,
      "grad_norm": 0.0005069047911092639,
      "learning_rate": 1.270119988293825e-05,
      "loss": 0.0,
      "step": 5098
    },
    {
      "epoch": 1.4922446590576528,
      "grad_norm": 0.000463327975012362,
      "learning_rate": 1.2693883523558679e-05,
      "loss": 0.0,
      "step": 5099
    },
    {
      "epoch": 1.4925373134328357,
      "grad_norm": 0.0002462085976731032,
      "learning_rate": 1.2686567164179105e-05,
      "loss": 0.0,
      "step": 5100
    },
    {
      "epoch": 1.4928299678080188,
      "grad_norm": 0.0003037136630155146,
      "learning_rate": 1.2679250804799533e-05,
      "loss": 0.0,
      "step": 5101
    },
    {
      "epoch": 1.4931226221832017,
      "grad_norm": 0.001337929628789425,
      "learning_rate": 1.2671934445419959e-05,
      "loss": 0.0,
      "step": 5102
    },
    {
      "epoch": 1.4934152765583846,
      "grad_norm": 0.0003057431895285845,
      "learning_rate": 1.2664618086040387e-05,
      "loss": 0.0,
      "step": 5103
    },
    {
      "epoch": 1.4937079309335675,
      "grad_norm": 0.005425651557743549,
      "learning_rate": 1.2657301726660813e-05,
      "loss": 0.0001,
      "step": 5104
    },
    {
      "epoch": 1.4940005853087504,
      "grad_norm": 0.039733052253723145,
      "learning_rate": 1.2649985367281241e-05,
      "loss": 0.0003,
      "step": 5105
    },
    {
      "epoch": 1.4942932396839332,
      "grad_norm": 0.00035593679058365524,
      "learning_rate": 1.2642669007901669e-05,
      "loss": 0.0,
      "step": 5106
    },
    {
      "epoch": 1.4945858940591161,
      "grad_norm": 0.00027309643337503076,
      "learning_rate": 1.2635352648522095e-05,
      "loss": 0.0,
      "step": 5107
    },
    {
      "epoch": 1.494878548434299,
      "grad_norm": 0.0004261991707608104,
      "learning_rate": 1.2628036289142522e-05,
      "loss": 0.0,
      "step": 5108
    },
    {
      "epoch": 1.4951712028094821,
      "grad_norm": 0.00017297992599196732,
      "learning_rate": 1.262071992976295e-05,
      "loss": 0.0,
      "step": 5109
    },
    {
      "epoch": 1.495463857184665,
      "grad_norm": 0.00018700190412346274,
      "learning_rate": 1.2613403570383378e-05,
      "loss": 0.0,
      "step": 5110
    },
    {
      "epoch": 1.495756511559848,
      "grad_norm": 0.00016983953537419438,
      "learning_rate": 1.2606087211003805e-05,
      "loss": 0.0,
      "step": 5111
    },
    {
      "epoch": 1.4960491659350308,
      "grad_norm": 0.00023661194427404553,
      "learning_rate": 1.2598770851624233e-05,
      "loss": 0.0,
      "step": 5112
    },
    {
      "epoch": 1.4963418203102137,
      "grad_norm": 0.0859820768237114,
      "learning_rate": 1.2591454492244658e-05,
      "loss": 0.0003,
      "step": 5113
    },
    {
      "epoch": 1.4966344746853966,
      "grad_norm": 0.0002156875270884484,
      "learning_rate": 1.2584138132865086e-05,
      "loss": 0.0,
      "step": 5114
    },
    {
      "epoch": 1.4969271290605795,
      "grad_norm": 0.0038348534144461155,
      "learning_rate": 1.2576821773485514e-05,
      "loss": 0.0,
      "step": 5115
    },
    {
      "epoch": 1.4972197834357623,
      "grad_norm": 0.0002400731318630278,
      "learning_rate": 1.2569505414105942e-05,
      "loss": 0.0,
      "step": 5116
    },
    {
      "epoch": 1.4975124378109452,
      "grad_norm": 0.005611362401396036,
      "learning_rate": 1.256218905472637e-05,
      "loss": 0.0001,
      "step": 5117
    },
    {
      "epoch": 1.4978050921861281,
      "grad_norm": 0.00036208340316079557,
      "learning_rate": 1.2554872695346794e-05,
      "loss": 0.0,
      "step": 5118
    },
    {
      "epoch": 1.498097746561311,
      "grad_norm": 0.00026920289383269846,
      "learning_rate": 1.2547556335967222e-05,
      "loss": 0.0,
      "step": 5119
    },
    {
      "epoch": 1.498390400936494,
      "grad_norm": 0.0069849989376962185,
      "learning_rate": 1.254023997658765e-05,
      "loss": 0.0001,
      "step": 5120
    },
    {
      "epoch": 1.4986830553116768,
      "grad_norm": 0.006348796654492617,
      "learning_rate": 1.2532923617208078e-05,
      "loss": 0.0001,
      "step": 5121
    },
    {
      "epoch": 1.49897570968686,
      "grad_norm": 0.0009372648200951517,
      "learning_rate": 1.2525607257828506e-05,
      "loss": 0.0,
      "step": 5122
    },
    {
      "epoch": 1.4992683640620428,
      "grad_norm": 0.00017370175919495523,
      "learning_rate": 1.251829089844893e-05,
      "loss": 0.0,
      "step": 5123
    },
    {
      "epoch": 1.4995610184372257,
      "grad_norm": 0.0004743621975649148,
      "learning_rate": 1.2510974539069359e-05,
      "loss": 0.0,
      "step": 5124
    },
    {
      "epoch": 1.4998536728124086,
      "grad_norm": 0.00016216834774240851,
      "learning_rate": 1.2503658179689787e-05,
      "loss": 0.0,
      "step": 5125
    },
    {
      "epoch": 1.5001463271875914,
      "grad_norm": 0.00036036837263964117,
      "learning_rate": 1.2496341820310215e-05,
      "loss": 0.0,
      "step": 5126
    },
    {
      "epoch": 1.5004389815627743,
      "grad_norm": 0.0001896850735647604,
      "learning_rate": 1.248902546093064e-05,
      "loss": 0.0,
      "step": 5127
    },
    {
      "epoch": 1.5007316359379572,
      "grad_norm": 0.0015876274555921555,
      "learning_rate": 1.2481709101551069e-05,
      "loss": 0.0,
      "step": 5128
    },
    {
      "epoch": 1.5010242903131403,
      "grad_norm": 0.0003233393654227257,
      "learning_rate": 1.2474392742171497e-05,
      "loss": 0.0,
      "step": 5129
    },
    {
      "epoch": 1.5013169446883232,
      "grad_norm": 0.00015400140546262264,
      "learning_rate": 1.2467076382791923e-05,
      "loss": 0.0,
      "step": 5130
    },
    {
      "epoch": 1.501609599063506,
      "grad_norm": 0.011688701808452606,
      "learning_rate": 1.2459760023412351e-05,
      "loss": 0.0001,
      "step": 5131
    },
    {
      "epoch": 1.501902253438689,
      "grad_norm": 0.0010759357828646898,
      "learning_rate": 1.2452443664032779e-05,
      "loss": 0.0,
      "step": 5132
    },
    {
      "epoch": 1.5021949078138719,
      "grad_norm": 0.0020588075276464224,
      "learning_rate": 1.2445127304653205e-05,
      "loss": 0.0,
      "step": 5133
    },
    {
      "epoch": 1.5024875621890548,
      "grad_norm": 0.00021067510533612221,
      "learning_rate": 1.2437810945273633e-05,
      "loss": 0.0,
      "step": 5134
    },
    {
      "epoch": 1.5027802165642377,
      "grad_norm": 0.0002671003749128431,
      "learning_rate": 1.243049458589406e-05,
      "loss": 0.0,
      "step": 5135
    },
    {
      "epoch": 1.5030728709394205,
      "grad_norm": 0.0004259385459590703,
      "learning_rate": 1.2423178226514487e-05,
      "loss": 0.0,
      "step": 5136
    },
    {
      "epoch": 1.5033655253146034,
      "grad_norm": 0.00041439401684328914,
      "learning_rate": 1.2415861867134915e-05,
      "loss": 0.0,
      "step": 5137
    },
    {
      "epoch": 1.5036581796897863,
      "grad_norm": 0.00020446009875740856,
      "learning_rate": 1.2408545507755342e-05,
      "loss": 0.0,
      "step": 5138
    },
    {
      "epoch": 1.5039508340649692,
      "grad_norm": 0.00014487757289316505,
      "learning_rate": 1.240122914837577e-05,
      "loss": 0.0,
      "step": 5139
    },
    {
      "epoch": 1.504243488440152,
      "grad_norm": 0.0002552311634644866,
      "learning_rate": 1.2393912788996197e-05,
      "loss": 0.0,
      "step": 5140
    },
    {
      "epoch": 1.504536142815335,
      "grad_norm": 0.0020015928894281387,
      "learning_rate": 1.2386596429616624e-05,
      "loss": 0.0,
      "step": 5141
    },
    {
      "epoch": 1.5048287971905179,
      "grad_norm": 0.000605784181971103,
      "learning_rate": 1.2379280070237052e-05,
      "loss": 0.0,
      "step": 5142
    },
    {
      "epoch": 1.5051214515657008,
      "grad_norm": 0.0010445835068821907,
      "learning_rate": 1.2371963710857478e-05,
      "loss": 0.0,
      "step": 5143
    },
    {
      "epoch": 1.5054141059408837,
      "grad_norm": 0.00021796950022689998,
      "learning_rate": 1.2364647351477906e-05,
      "loss": 0.0,
      "step": 5144
    },
    {
      "epoch": 1.5057067603160668,
      "grad_norm": 0.0011622559977695346,
      "learning_rate": 1.2357330992098334e-05,
      "loss": 0.0,
      "step": 5145
    },
    {
      "epoch": 1.5059994146912496,
      "grad_norm": 0.00017326697707176208,
      "learning_rate": 1.235001463271876e-05,
      "loss": 0.0,
      "step": 5146
    },
    {
      "epoch": 1.5062920690664325,
      "grad_norm": 0.0017791668651625514,
      "learning_rate": 1.2342698273339188e-05,
      "loss": 0.0,
      "step": 5147
    },
    {
      "epoch": 1.5065847234416154,
      "grad_norm": 0.0007580950041301548,
      "learning_rate": 1.2335381913959614e-05,
      "loss": 0.0,
      "step": 5148
    },
    {
      "epoch": 1.5068773778167983,
      "grad_norm": 0.00043160910718142986,
      "learning_rate": 1.2328065554580042e-05,
      "loss": 0.0,
      "step": 5149
    },
    {
      "epoch": 1.5071700321919814,
      "grad_norm": 0.0001991214812733233,
      "learning_rate": 1.2320749195200468e-05,
      "loss": 0.0,
      "step": 5150
    },
    {
      "epoch": 1.5074626865671643,
      "grad_norm": 0.0004408336244523525,
      "learning_rate": 1.2313432835820896e-05,
      "loss": 0.0,
      "step": 5151
    },
    {
      "epoch": 1.5077553409423472,
      "grad_norm": 0.0007374389097094536,
      "learning_rate": 1.2306116476441323e-05,
      "loss": 0.0,
      "step": 5152
    },
    {
      "epoch": 1.50804799531753,
      "grad_norm": 0.00024802814004942775,
      "learning_rate": 1.229880011706175e-05,
      "loss": 0.0,
      "step": 5153
    },
    {
      "epoch": 1.508340649692713,
      "grad_norm": 0.00018486542103346437,
      "learning_rate": 1.2291483757682177e-05,
      "loss": 0.0,
      "step": 5154
    },
    {
      "epoch": 1.5086333040678959,
      "grad_norm": 0.0001799042511265725,
      "learning_rate": 1.2284167398302605e-05,
      "loss": 0.0,
      "step": 5155
    },
    {
      "epoch": 1.5089259584430788,
      "grad_norm": 0.00019250593322794884,
      "learning_rate": 1.2276851038923033e-05,
      "loss": 0.0,
      "step": 5156
    },
    {
      "epoch": 1.5092186128182616,
      "grad_norm": 0.00026157504180446267,
      "learning_rate": 1.2269534679543459e-05,
      "loss": 0.0,
      "step": 5157
    },
    {
      "epoch": 1.5095112671934445,
      "grad_norm": 0.00043231903691776097,
      "learning_rate": 1.2262218320163887e-05,
      "loss": 0.0,
      "step": 5158
    },
    {
      "epoch": 1.5098039215686274,
      "grad_norm": 0.0003085647476837039,
      "learning_rate": 1.2254901960784313e-05,
      "loss": 0.0,
      "step": 5159
    },
    {
      "epoch": 1.5100965759438103,
      "grad_norm": 0.00012914616672787815,
      "learning_rate": 1.2247585601404741e-05,
      "loss": 0.0,
      "step": 5160
    },
    {
      "epoch": 1.5103892303189932,
      "grad_norm": 0.0002531928475946188,
      "learning_rate": 1.2240269242025169e-05,
      "loss": 0.0,
      "step": 5161
    },
    {
      "epoch": 1.510681884694176,
      "grad_norm": 0.00022002340119797736,
      "learning_rate": 1.2232952882645595e-05,
      "loss": 0.0,
      "step": 5162
    },
    {
      "epoch": 1.510974539069359,
      "grad_norm": 0.00036131092929281294,
      "learning_rate": 1.2225636523266023e-05,
      "loss": 0.0,
      "step": 5163
    },
    {
      "epoch": 1.5112671934445419,
      "grad_norm": 0.0001618622336536646,
      "learning_rate": 1.221832016388645e-05,
      "loss": 0.0,
      "step": 5164
    },
    {
      "epoch": 1.5115598478197247,
      "grad_norm": 0.000462544645415619,
      "learning_rate": 1.2211003804506878e-05,
      "loss": 0.0,
      "step": 5165
    },
    {
      "epoch": 1.5118525021949079,
      "grad_norm": 0.13465426862239838,
      "learning_rate": 1.2203687445127305e-05,
      "loss": 0.0006,
      "step": 5166
    },
    {
      "epoch": 1.5121451565700907,
      "grad_norm": 0.0018989065429195762,
      "learning_rate": 1.2196371085747732e-05,
      "loss": 0.0,
      "step": 5167
    },
    {
      "epoch": 1.5124378109452736,
      "grad_norm": 0.00041230389615520835,
      "learning_rate": 1.218905472636816e-05,
      "loss": 0.0,
      "step": 5168
    },
    {
      "epoch": 1.5127304653204565,
      "grad_norm": 5.563209924730472e-05,
      "learning_rate": 1.2181738366988588e-05,
      "loss": 0.0,
      "step": 5169
    },
    {
      "epoch": 1.5130231196956394,
      "grad_norm": 0.0004752097011078149,
      "learning_rate": 1.2174422007609014e-05,
      "loss": 0.0,
      "step": 5170
    },
    {
      "epoch": 1.5133157740708225,
      "grad_norm": 0.014747914858162403,
      "learning_rate": 1.2167105648229442e-05,
      "loss": 0.0001,
      "step": 5171
    },
    {
      "epoch": 1.5136084284460054,
      "grad_norm": 0.00024430773919448256,
      "learning_rate": 1.2159789288849868e-05,
      "loss": 0.0,
      "step": 5172
    },
    {
      "epoch": 1.5139010828211883,
      "grad_norm": 0.00029366539092734456,
      "learning_rate": 1.2152472929470296e-05,
      "loss": 0.0,
      "step": 5173
    },
    {
      "epoch": 1.5141937371963712,
      "grad_norm": 0.6517511606216431,
      "learning_rate": 1.2145156570090724e-05,
      "loss": 0.0012,
      "step": 5174
    },
    {
      "epoch": 1.514486391571554,
      "grad_norm": 0.00038376718293875456,
      "learning_rate": 1.213784021071115e-05,
      "loss": 0.0,
      "step": 5175
    },
    {
      "epoch": 1.514779045946737,
      "grad_norm": 0.00014857873611617833,
      "learning_rate": 1.2130523851331578e-05,
      "loss": 0.0,
      "step": 5176
    },
    {
      "epoch": 1.5150717003219198,
      "grad_norm": 0.0004696003161370754,
      "learning_rate": 1.2123207491952006e-05,
      "loss": 0.0,
      "step": 5177
    },
    {
      "epoch": 1.5153643546971027,
      "grad_norm": 0.0009346023434773088,
      "learning_rate": 1.2115891132572432e-05,
      "loss": 0.0,
      "step": 5178
    },
    {
      "epoch": 1.5156570090722856,
      "grad_norm": 0.008241580799221992,
      "learning_rate": 1.210857477319286e-05,
      "loss": 0.0001,
      "step": 5179
    },
    {
      "epoch": 1.5159496634474685,
      "grad_norm": 0.00023253982362803072,
      "learning_rate": 1.2101258413813287e-05,
      "loss": 0.0,
      "step": 5180
    },
    {
      "epoch": 1.5162423178226514,
      "grad_norm": 0.00026602420257404447,
      "learning_rate": 1.2093942054433715e-05,
      "loss": 0.0,
      "step": 5181
    },
    {
      "epoch": 1.5165349721978343,
      "grad_norm": 0.0006170397973619401,
      "learning_rate": 1.2086625695054143e-05,
      "loss": 0.0,
      "step": 5182
    },
    {
      "epoch": 1.5168276265730172,
      "grad_norm": 0.0007347619393840432,
      "learning_rate": 1.2079309335674569e-05,
      "loss": 0.0,
      "step": 5183
    },
    {
      "epoch": 1.5171202809482,
      "grad_norm": 0.00021642430510837585,
      "learning_rate": 1.2071992976294997e-05,
      "loss": 0.0,
      "step": 5184
    },
    {
      "epoch": 1.517412935323383,
      "grad_norm": 0.00039059240953065455,
      "learning_rate": 1.2064676616915425e-05,
      "loss": 0.0,
      "step": 5185
    },
    {
      "epoch": 1.5177055896985658,
      "grad_norm": 0.00016414794663432986,
      "learning_rate": 1.2057360257535851e-05,
      "loss": 0.0,
      "step": 5186
    },
    {
      "epoch": 1.517998244073749,
      "grad_norm": 0.0010327588533982635,
      "learning_rate": 1.2050043898156279e-05,
      "loss": 0.0,
      "step": 5187
    },
    {
      "epoch": 1.5182908984489318,
      "grad_norm": 0.0007639078539796174,
      "learning_rate": 1.2042727538776705e-05,
      "loss": 0.0,
      "step": 5188
    },
    {
      "epoch": 1.5185835528241147,
      "grad_norm": 0.0015009710332378745,
      "learning_rate": 1.2035411179397133e-05,
      "loss": 0.0,
      "step": 5189
    },
    {
      "epoch": 1.5188762071992976,
      "grad_norm": 0.0005584223545156419,
      "learning_rate": 1.2028094820017561e-05,
      "loss": 0.0,
      "step": 5190
    },
    {
      "epoch": 1.5191688615744805,
      "grad_norm": 0.0007484898087568581,
      "learning_rate": 1.2020778460637987e-05,
      "loss": 0.0,
      "step": 5191
    },
    {
      "epoch": 1.5194615159496636,
      "grad_norm": 0.000180575909325853,
      "learning_rate": 1.2013462101258415e-05,
      "loss": 0.0,
      "step": 5192
    },
    {
      "epoch": 1.5197541703248465,
      "grad_norm": 0.00027021754067391157,
      "learning_rate": 1.2006145741878842e-05,
      "loss": 0.0,
      "step": 5193
    },
    {
      "epoch": 1.5200468247000294,
      "grad_norm": 0.000288851821096614,
      "learning_rate": 1.199882938249927e-05,
      "loss": 0.0,
      "step": 5194
    },
    {
      "epoch": 1.5203394790752123,
      "grad_norm": 0.0009474614635109901,
      "learning_rate": 1.1991513023119696e-05,
      "loss": 0.0,
      "step": 5195
    },
    {
      "epoch": 1.5206321334503952,
      "grad_norm": 0.0007197274244390428,
      "learning_rate": 1.1984196663740124e-05,
      "loss": 0.0,
      "step": 5196
    },
    {
      "epoch": 1.520924787825578,
      "grad_norm": 0.0004081302904523909,
      "learning_rate": 1.197688030436055e-05,
      "loss": 0.0,
      "step": 5197
    },
    {
      "epoch": 1.521217442200761,
      "grad_norm": 0.0008590701618231833,
      "learning_rate": 1.1969563944980978e-05,
      "loss": 0.0,
      "step": 5198
    },
    {
      "epoch": 1.5215100965759438,
      "grad_norm": 11.854995727539062,
      "learning_rate": 1.1962247585601404e-05,
      "loss": 0.1868,
      "step": 5199
    },
    {
      "epoch": 1.5218027509511267,
      "grad_norm": 0.0003525600768625736,
      "learning_rate": 1.1954931226221832e-05,
      "loss": 0.0,
      "step": 5200
    },
    {
      "epoch": 1.5220954053263096,
      "grad_norm": 0.0009801547275856137,
      "learning_rate": 1.194761486684226e-05,
      "loss": 0.0,
      "step": 5201
    },
    {
      "epoch": 1.5223880597014925,
      "grad_norm": 0.00014240208838600665,
      "learning_rate": 1.1940298507462686e-05,
      "loss": 0.0,
      "step": 5202
    },
    {
      "epoch": 1.5226807140766754,
      "grad_norm": 0.0006725612911395729,
      "learning_rate": 1.1932982148083114e-05,
      "loss": 0.0,
      "step": 5203
    },
    {
      "epoch": 1.5229733684518583,
      "grad_norm": 0.00021944883337710053,
      "learning_rate": 1.192566578870354e-05,
      "loss": 0.0,
      "step": 5204
    },
    {
      "epoch": 1.5232660228270412,
      "grad_norm": 0.00021907799236942083,
      "learning_rate": 1.1918349429323968e-05,
      "loss": 0.0,
      "step": 5205
    },
    {
      "epoch": 1.523558677202224,
      "grad_norm": 0.001510326866991818,
      "learning_rate": 1.1911033069944396e-05,
      "loss": 0.0,
      "step": 5206
    },
    {
      "epoch": 1.523851331577407,
      "grad_norm": 6.531454710057005e-05,
      "learning_rate": 1.1903716710564823e-05,
      "loss": 0.0,
      "step": 5207
    },
    {
      "epoch": 1.52414398595259,
      "grad_norm": 0.000475716165965423,
      "learning_rate": 1.189640035118525e-05,
      "loss": 0.0,
      "step": 5208
    },
    {
      "epoch": 1.524436640327773,
      "grad_norm": 0.0007988332072272897,
      "learning_rate": 1.1889083991805679e-05,
      "loss": 0.0,
      "step": 5209
    },
    {
      "epoch": 1.5247292947029558,
      "grad_norm": 0.0004497581103350967,
      "learning_rate": 1.1881767632426105e-05,
      "loss": 0.0,
      "step": 5210
    },
    {
      "epoch": 1.5250219490781387,
      "grad_norm": 0.00024964759359136224,
      "learning_rate": 1.1874451273046533e-05,
      "loss": 0.0,
      "step": 5211
    },
    {
      "epoch": 1.5253146034533216,
      "grad_norm": 0.0013601906830444932,
      "learning_rate": 1.1867134913666959e-05,
      "loss": 0.0,
      "step": 5212
    },
    {
      "epoch": 1.5256072578285045,
      "grad_norm": 0.0006484125624410808,
      "learning_rate": 1.1859818554287387e-05,
      "loss": 0.0,
      "step": 5213
    },
    {
      "epoch": 1.5258999122036876,
      "grad_norm": 0.0006421684520319104,
      "learning_rate": 1.1852502194907815e-05,
      "loss": 0.0,
      "step": 5214
    },
    {
      "epoch": 1.5261925665788705,
      "grad_norm": 0.00012406571477185935,
      "learning_rate": 1.1845185835528241e-05,
      "loss": 0.0,
      "step": 5215
    },
    {
      "epoch": 1.5264852209540534,
      "grad_norm": 0.00018810993060469627,
      "learning_rate": 1.1837869476148669e-05,
      "loss": 0.0,
      "step": 5216
    },
    {
      "epoch": 1.5267778753292363,
      "grad_norm": 0.00022501441708300263,
      "learning_rate": 1.1830553116769097e-05,
      "loss": 0.0,
      "step": 5217
    },
    {
      "epoch": 1.5270705297044191,
      "grad_norm": 0.00022160427761264145,
      "learning_rate": 1.1823236757389523e-05,
      "loss": 0.0,
      "step": 5218
    },
    {
      "epoch": 1.527363184079602,
      "grad_norm": 0.0007525791297666728,
      "learning_rate": 1.1815920398009951e-05,
      "loss": 0.0,
      "step": 5219
    },
    {
      "epoch": 1.527655838454785,
      "grad_norm": 0.001195007935166359,
      "learning_rate": 1.1808604038630378e-05,
      "loss": 0.0,
      "step": 5220
    },
    {
      "epoch": 1.5279484928299678,
      "grad_norm": 0.00014784677478019148,
      "learning_rate": 1.1801287679250805e-05,
      "loss": 0.0,
      "step": 5221
    },
    {
      "epoch": 1.5282411472051507,
      "grad_norm": 0.0003311718173790723,
      "learning_rate": 1.1793971319871233e-05,
      "loss": 0.0,
      "step": 5222
    },
    {
      "epoch": 1.5285338015803336,
      "grad_norm": 0.012646566145122051,
      "learning_rate": 1.178665496049166e-05,
      "loss": 0.0001,
      "step": 5223
    },
    {
      "epoch": 1.5288264559555165,
      "grad_norm": 0.0003175755846314132,
      "learning_rate": 1.1779338601112088e-05,
      "loss": 0.0,
      "step": 5224
    },
    {
      "epoch": 1.5291191103306994,
      "grad_norm": 0.0007361461757682264,
      "learning_rate": 1.1772022241732514e-05,
      "loss": 0.0,
      "step": 5225
    },
    {
      "epoch": 1.5294117647058822,
      "grad_norm": 0.002891580108553171,
      "learning_rate": 1.1764705882352942e-05,
      "loss": 0.0,
      "step": 5226
    },
    {
      "epoch": 1.5297044190810651,
      "grad_norm": 0.01717713475227356,
      "learning_rate": 1.175738952297337e-05,
      "loss": 0.0001,
      "step": 5227
    },
    {
      "epoch": 1.529997073456248,
      "grad_norm": 0.0002141928707715124,
      "learning_rate": 1.1750073163593796e-05,
      "loss": 0.0,
      "step": 5228
    },
    {
      "epoch": 1.5302897278314311,
      "grad_norm": 0.00033819713280536234,
      "learning_rate": 1.1742756804214224e-05,
      "loss": 0.0,
      "step": 5229
    },
    {
      "epoch": 1.530582382206614,
      "grad_norm": 0.00022239873942453414,
      "learning_rate": 1.1735440444834652e-05,
      "loss": 0.0,
      "step": 5230
    },
    {
      "epoch": 1.530875036581797,
      "grad_norm": 0.00019714840163942426,
      "learning_rate": 1.1728124085455078e-05,
      "loss": 0.0,
      "step": 5231
    },
    {
      "epoch": 1.5311676909569798,
      "grad_norm": 0.0002605865884106606,
      "learning_rate": 1.1720807726075506e-05,
      "loss": 0.0,
      "step": 5232
    },
    {
      "epoch": 1.5314603453321627,
      "grad_norm": 0.00030124778277240694,
      "learning_rate": 1.1713491366695932e-05,
      "loss": 0.0,
      "step": 5233
    },
    {
      "epoch": 1.5317529997073456,
      "grad_norm": 0.0002326490357518196,
      "learning_rate": 1.170617500731636e-05,
      "loss": 0.0,
      "step": 5234
    },
    {
      "epoch": 1.5320456540825287,
      "grad_norm": 0.0002671336114872247,
      "learning_rate": 1.1698858647936788e-05,
      "loss": 0.0,
      "step": 5235
    },
    {
      "epoch": 1.5323383084577116,
      "grad_norm": 7.774143887218088e-05,
      "learning_rate": 1.1691542288557215e-05,
      "loss": 0.0,
      "step": 5236
    },
    {
      "epoch": 1.5326309628328945,
      "grad_norm": 0.000155685018398799,
      "learning_rate": 1.1684225929177643e-05,
      "loss": 0.0,
      "step": 5237
    },
    {
      "epoch": 1.5329236172080773,
      "grad_norm": 0.0005528471665456891,
      "learning_rate": 1.1676909569798069e-05,
      "loss": 0.0,
      "step": 5238
    },
    {
      "epoch": 1.5332162715832602,
      "grad_norm": 0.0002706719096750021,
      "learning_rate": 1.1669593210418497e-05,
      "loss": 0.0,
      "step": 5239
    },
    {
      "epoch": 1.5335089259584431,
      "grad_norm": 0.0028580750804394484,
      "learning_rate": 1.1662276851038923e-05,
      "loss": 0.0,
      "step": 5240
    },
    {
      "epoch": 1.533801580333626,
      "grad_norm": 0.0005850521847605705,
      "learning_rate": 1.1654960491659351e-05,
      "loss": 0.0,
      "step": 5241
    },
    {
      "epoch": 1.534094234708809,
      "grad_norm": 0.00022836266725789756,
      "learning_rate": 1.1647644132279777e-05,
      "loss": 0.0,
      "step": 5242
    },
    {
      "epoch": 1.5343868890839918,
      "grad_norm": 0.0002294753649039194,
      "learning_rate": 1.1640327772900205e-05,
      "loss": 0.0,
      "step": 5243
    },
    {
      "epoch": 1.5346795434591747,
      "grad_norm": 0.0001081616137525998,
      "learning_rate": 1.1633011413520631e-05,
      "loss": 0.0,
      "step": 5244
    },
    {
      "epoch": 1.5349721978343576,
      "grad_norm": 0.00021684737293981016,
      "learning_rate": 1.162569505414106e-05,
      "loss": 0.0,
      "step": 5245
    },
    {
      "epoch": 1.5352648522095405,
      "grad_norm": 0.15254947543144226,
      "learning_rate": 1.1618378694761487e-05,
      "loss": 0.0005,
      "step": 5246
    },
    {
      "epoch": 1.5355575065847233,
      "grad_norm": 0.0001880263298517093,
      "learning_rate": 1.1611062335381914e-05,
      "loss": 0.0,
      "step": 5247
    },
    {
      "epoch": 1.5358501609599062,
      "grad_norm": 0.00023818780027795583,
      "learning_rate": 1.1603745976002341e-05,
      "loss": 0.0,
      "step": 5248
    },
    {
      "epoch": 1.5361428153350891,
      "grad_norm": 0.0008488367311656475,
      "learning_rate": 1.1596429616622768e-05,
      "loss": 0.0,
      "step": 5249
    },
    {
      "epoch": 1.536435469710272,
      "grad_norm": 0.00025859667221084237,
      "learning_rate": 1.1589113257243196e-05,
      "loss": 0.0,
      "step": 5250
    },
    {
      "epoch": 1.5367281240854551,
      "grad_norm": 0.0006804241565987468,
      "learning_rate": 1.1581796897863624e-05,
      "loss": 0.0,
      "step": 5251
    },
    {
      "epoch": 1.537020778460638,
      "grad_norm": 0.0003907028876710683,
      "learning_rate": 1.157448053848405e-05,
      "loss": 0.0,
      "step": 5252
    },
    {
      "epoch": 1.537313432835821,
      "grad_norm": 0.0006528544472530484,
      "learning_rate": 1.1567164179104478e-05,
      "loss": 0.0,
      "step": 5253
    },
    {
      "epoch": 1.5376060872110038,
      "grad_norm": 0.0007995760533958673,
      "learning_rate": 1.1559847819724906e-05,
      "loss": 0.0,
      "step": 5254
    },
    {
      "epoch": 1.5378987415861867,
      "grad_norm": 0.00013504792877938598,
      "learning_rate": 1.1552531460345332e-05,
      "loss": 0.0,
      "step": 5255
    },
    {
      "epoch": 1.5381913959613698,
      "grad_norm": 0.0011998468544334173,
      "learning_rate": 1.154521510096576e-05,
      "loss": 0.0,
      "step": 5256
    },
    {
      "epoch": 1.5384840503365527,
      "grad_norm": 0.00048505334416404366,
      "learning_rate": 1.1537898741586186e-05,
      "loss": 0.0,
      "step": 5257
    },
    {
      "epoch": 1.5387767047117356,
      "grad_norm": 0.0005926922312937677,
      "learning_rate": 1.1530582382206614e-05,
      "loss": 0.0,
      "step": 5258
    },
    {
      "epoch": 1.5390693590869184,
      "grad_norm": 0.0023627367336302996,
      "learning_rate": 1.1523266022827042e-05,
      "loss": 0.0,
      "step": 5259
    },
    {
      "epoch": 1.5393620134621013,
      "grad_norm": 0.00027779172523878515,
      "learning_rate": 1.1515949663447468e-05,
      "loss": 0.0,
      "step": 5260
    },
    {
      "epoch": 1.5396546678372842,
      "grad_norm": 0.0007822969928383827,
      "learning_rate": 1.1508633304067896e-05,
      "loss": 0.0,
      "step": 5261
    },
    {
      "epoch": 1.539947322212467,
      "grad_norm": 7.334064483642578,
      "learning_rate": 1.1501316944688324e-05,
      "loss": 0.1488,
      "step": 5262
    },
    {
      "epoch": 1.54023997658765,
      "grad_norm": 0.0007610148168168962,
      "learning_rate": 1.149400058530875e-05,
      "loss": 0.0,
      "step": 5263
    },
    {
      "epoch": 1.5405326309628329,
      "grad_norm": 0.00011353415175108239,
      "learning_rate": 1.1486684225929179e-05,
      "loss": 0.0,
      "step": 5264
    },
    {
      "epoch": 1.5408252853380158,
      "grad_norm": 0.0006136444280855358,
      "learning_rate": 1.1479367866549605e-05,
      "loss": 0.0,
      "step": 5265
    },
    {
      "epoch": 1.5411179397131987,
      "grad_norm": 0.00028312645736150444,
      "learning_rate": 1.1472051507170033e-05,
      "loss": 0.0,
      "step": 5266
    },
    {
      "epoch": 1.5414105940883815,
      "grad_norm": 0.0005532561335712671,
      "learning_rate": 1.146473514779046e-05,
      "loss": 0.0,
      "step": 5267
    },
    {
      "epoch": 1.5417032484635644,
      "grad_norm": 0.0004004383517894894,
      "learning_rate": 1.1457418788410887e-05,
      "loss": 0.0,
      "step": 5268
    },
    {
      "epoch": 1.5419959028387473,
      "grad_norm": 0.001884422730654478,
      "learning_rate": 1.1450102429031315e-05,
      "loss": 0.0,
      "step": 5269
    },
    {
      "epoch": 1.5422885572139302,
      "grad_norm": 0.0054294029250741005,
      "learning_rate": 1.1442786069651743e-05,
      "loss": 0.0001,
      "step": 5270
    },
    {
      "epoch": 1.542581211589113,
      "grad_norm": 0.0014466885477304459,
      "learning_rate": 1.1435469710272169e-05,
      "loss": 0.0,
      "step": 5271
    },
    {
      "epoch": 1.5428738659642962,
      "grad_norm": 0.000630267895758152,
      "learning_rate": 1.1428153350892597e-05,
      "loss": 0.0,
      "step": 5272
    },
    {
      "epoch": 1.543166520339479,
      "grad_norm": 0.004547810181975365,
      "learning_rate": 1.1420836991513023e-05,
      "loss": 0.0,
      "step": 5273
    },
    {
      "epoch": 1.543459174714662,
      "grad_norm": 0.00033015169901773334,
      "learning_rate": 1.1413520632133451e-05,
      "loss": 0.0,
      "step": 5274
    },
    {
      "epoch": 1.5437518290898449,
      "grad_norm": 0.0033606109209358692,
      "learning_rate": 1.140620427275388e-05,
      "loss": 0.0,
      "step": 5275
    },
    {
      "epoch": 1.5440444834650278,
      "grad_norm": 0.0004756157286465168,
      "learning_rate": 1.1398887913374305e-05,
      "loss": 0.0,
      "step": 5276
    },
    {
      "epoch": 1.5443371378402109,
      "grad_norm": 0.0032487944699823856,
      "learning_rate": 1.1391571553994733e-05,
      "loss": 0.0,
      "step": 5277
    },
    {
      "epoch": 1.5446297922153938,
      "grad_norm": 0.00032525573624297976,
      "learning_rate": 1.1384255194615161e-05,
      "loss": 0.0,
      "step": 5278
    },
    {
      "epoch": 1.5449224465905766,
      "grad_norm": 0.002686867257580161,
      "learning_rate": 1.1376938835235588e-05,
      "loss": 0.0,
      "step": 5279
    },
    {
      "epoch": 1.5452151009657595,
      "grad_norm": 0.0040970370173454285,
      "learning_rate": 1.1369622475856016e-05,
      "loss": 0.0001,
      "step": 5280
    },
    {
      "epoch": 1.5455077553409424,
      "grad_norm": 0.0005278911557979882,
      "learning_rate": 1.1362306116476442e-05,
      "loss": 0.0,
      "step": 5281
    },
    {
      "epoch": 1.5458004097161253,
      "grad_norm": 0.0020878189243376255,
      "learning_rate": 1.135498975709687e-05,
      "loss": 0.0,
      "step": 5282
    },
    {
      "epoch": 1.5460930640913082,
      "grad_norm": 0.0007402309565804899,
      "learning_rate": 1.1347673397717298e-05,
      "loss": 0.0,
      "step": 5283
    },
    {
      "epoch": 1.546385718466491,
      "grad_norm": 0.1317036747932434,
      "learning_rate": 1.1340357038337724e-05,
      "loss": 0.0005,
      "step": 5284
    },
    {
      "epoch": 1.546678372841674,
      "grad_norm": 0.001256045768968761,
      "learning_rate": 1.1333040678958152e-05,
      "loss": 0.0,
      "step": 5285
    },
    {
      "epoch": 1.5469710272168569,
      "grad_norm": 0.0002911437186412513,
      "learning_rate": 1.1325724319578578e-05,
      "loss": 0.0,
      "step": 5286
    },
    {
      "epoch": 1.5472636815920398,
      "grad_norm": 0.000973232788965106,
      "learning_rate": 1.1318407960199006e-05,
      "loss": 0.0,
      "step": 5287
    },
    {
      "epoch": 1.5475563359672226,
      "grad_norm": 0.0005813579773530364,
      "learning_rate": 1.1311091600819432e-05,
      "loss": 0.0,
      "step": 5288
    },
    {
      "epoch": 1.5478489903424055,
      "grad_norm": 0.0004719037970062345,
      "learning_rate": 1.130377524143986e-05,
      "loss": 0.0,
      "step": 5289
    },
    {
      "epoch": 1.5481416447175884,
      "grad_norm": 0.0004831487312912941,
      "learning_rate": 1.1296458882060287e-05,
      "loss": 0.0,
      "step": 5290
    },
    {
      "epoch": 1.5484342990927713,
      "grad_norm": 0.00042136141564697027,
      "learning_rate": 1.1289142522680715e-05,
      "loss": 0.0,
      "step": 5291
    },
    {
      "epoch": 1.5487269534679542,
      "grad_norm": 0.001426449278369546,
      "learning_rate": 1.128182616330114e-05,
      "loss": 0.0,
      "step": 5292
    },
    {
      "epoch": 1.5490196078431373,
      "grad_norm": 0.0014369417913258076,
      "learning_rate": 1.1274509803921569e-05,
      "loss": 0.0,
      "step": 5293
    },
    {
      "epoch": 1.5493122622183202,
      "grad_norm": 0.0026862751692533493,
      "learning_rate": 1.1267193444541995e-05,
      "loss": 0.0,
      "step": 5294
    },
    {
      "epoch": 1.549604916593503,
      "grad_norm": 0.001535497372969985,
      "learning_rate": 1.1259877085162423e-05,
      "loss": 0.0,
      "step": 5295
    },
    {
      "epoch": 1.549897570968686,
      "grad_norm": 0.06716033816337585,
      "learning_rate": 1.1252560725782851e-05,
      "loss": 0.0004,
      "step": 5296
    },
    {
      "epoch": 1.5501902253438689,
      "grad_norm": 0.0018338051158934832,
      "learning_rate": 1.1245244366403277e-05,
      "loss": 0.0,
      "step": 5297
    },
    {
      "epoch": 1.550482879719052,
      "grad_norm": 0.009203138761222363,
      "learning_rate": 1.1237928007023705e-05,
      "loss": 0.0001,
      "step": 5298
    },
    {
      "epoch": 1.5507755340942349,
      "grad_norm": 0.0012893445091322064,
      "learning_rate": 1.1230611647644133e-05,
      "loss": 0.0,
      "step": 5299
    },
    {
      "epoch": 1.5510681884694177,
      "grad_norm": 0.00018410121265333146,
      "learning_rate": 1.122329528826456e-05,
      "loss": 0.0,
      "step": 5300
    },
    {
      "epoch": 1.5513608428446006,
      "grad_norm": 22.063335418701172,
      "learning_rate": 1.1215978928884987e-05,
      "loss": 0.1073,
      "step": 5301
    },
    {
      "epoch": 1.5516534972197835,
      "grad_norm": 0.00047670266940258443,
      "learning_rate": 1.1208662569505414e-05,
      "loss": 0.0,
      "step": 5302
    },
    {
      "epoch": 1.5519461515949664,
      "grad_norm": 3.2325854301452637,
      "learning_rate": 1.1201346210125841e-05,
      "loss": 0.2646,
      "step": 5303
    },
    {
      "epoch": 1.5522388059701493,
      "grad_norm": 0.002112477319315076,
      "learning_rate": 1.119402985074627e-05,
      "loss": 0.0,
      "step": 5304
    },
    {
      "epoch": 1.5525314603453322,
      "grad_norm": 0.0007211135234683752,
      "learning_rate": 1.1186713491366696e-05,
      "loss": 0.0,
      "step": 5305
    },
    {
      "epoch": 1.552824114720515,
      "grad_norm": 0.000910220667719841,
      "learning_rate": 1.1179397131987124e-05,
      "loss": 0.0,
      "step": 5306
    },
    {
      "epoch": 1.553116769095698,
      "grad_norm": 0.08611762523651123,
      "learning_rate": 1.1172080772607552e-05,
      "loss": 0.0004,
      "step": 5307
    },
    {
      "epoch": 1.5534094234708808,
      "grad_norm": 0.2647046148777008,
      "learning_rate": 1.1164764413227978e-05,
      "loss": 0.0009,
      "step": 5308
    },
    {
      "epoch": 1.5537020778460637,
      "grad_norm": 16.1777400970459,
      "learning_rate": 1.1157448053848406e-05,
      "loss": 0.0437,
      "step": 5309
    },
    {
      "epoch": 1.5539947322212466,
      "grad_norm": 0.000800949230324477,
      "learning_rate": 1.1150131694468832e-05,
      "loss": 0.0,
      "step": 5310
    },
    {
      "epoch": 1.5542873865964295,
      "grad_norm": 0.0012882291339337826,
      "learning_rate": 1.114281533508926e-05,
      "loss": 0.0,
      "step": 5311
    },
    {
      "epoch": 1.5545800409716124,
      "grad_norm": 0.00023692568356636912,
      "learning_rate": 1.1135498975709688e-05,
      "loss": 0.0,
      "step": 5312
    },
    {
      "epoch": 1.5548726953467953,
      "grad_norm": 0.0003705483686644584,
      "learning_rate": 1.1128182616330114e-05,
      "loss": 0.0,
      "step": 5313
    },
    {
      "epoch": 1.5551653497219784,
      "grad_norm": 0.1628727912902832,
      "learning_rate": 1.1120866256950542e-05,
      "loss": 0.0008,
      "step": 5314
    },
    {
      "epoch": 1.5554580040971613,
      "grad_norm": 0.0014205491170287132,
      "learning_rate": 1.111354989757097e-05,
      "loss": 0.0,
      "step": 5315
    },
    {
      "epoch": 1.5557506584723442,
      "grad_norm": 0.0007606131257489324,
      "learning_rate": 1.1106233538191396e-05,
      "loss": 0.0,
      "step": 5316
    },
    {
      "epoch": 1.556043312847527,
      "grad_norm": 0.41100841760635376,
      "learning_rate": 1.1098917178811824e-05,
      "loss": 0.0016,
      "step": 5317
    },
    {
      "epoch": 1.55633596722271,
      "grad_norm": 0.003020121483132243,
      "learning_rate": 1.109160081943225e-05,
      "loss": 0.0001,
      "step": 5318
    },
    {
      "epoch": 1.5566286215978928,
      "grad_norm": 0.0013891629641875625,
      "learning_rate": 1.1084284460052679e-05,
      "loss": 0.0,
      "step": 5319
    },
    {
      "epoch": 1.556921275973076,
      "grad_norm": 0.0006036332342773676,
      "learning_rate": 1.1076968100673106e-05,
      "loss": 0.0,
      "step": 5320
    },
    {
      "epoch": 1.5572139303482588,
      "grad_norm": 0.0015644223894923925,
      "learning_rate": 1.1069651741293533e-05,
      "loss": 0.0,
      "step": 5321
    },
    {
      "epoch": 1.5575065847234417,
      "grad_norm": 0.02502756007015705,
      "learning_rate": 1.106233538191396e-05,
      "loss": 0.0002,
      "step": 5322
    },
    {
      "epoch": 1.5577992390986246,
      "grad_norm": 0.0007970908773131669,
      "learning_rate": 1.1055019022534389e-05,
      "loss": 0.0,
      "step": 5323
    },
    {
      "epoch": 1.5580918934738075,
      "grad_norm": 0.000541463028639555,
      "learning_rate": 1.1047702663154815e-05,
      "loss": 0.0,
      "step": 5324
    },
    {
      "epoch": 1.5583845478489904,
      "grad_norm": 0.0011445097625255585,
      "learning_rate": 1.1040386303775243e-05,
      "loss": 0.0,
      "step": 5325
    },
    {
      "epoch": 1.5586772022241733,
      "grad_norm": 0.006099475547671318,
      "learning_rate": 1.1033069944395669e-05,
      "loss": 0.0001,
      "step": 5326
    },
    {
      "epoch": 1.5589698565993562,
      "grad_norm": 0.009510427713394165,
      "learning_rate": 1.1025753585016097e-05,
      "loss": 0.0001,
      "step": 5327
    },
    {
      "epoch": 1.559262510974539,
      "grad_norm": 0.002174089662730694,
      "learning_rate": 1.1018437225636525e-05,
      "loss": 0.0,
      "step": 5328
    },
    {
      "epoch": 1.559555165349722,
      "grad_norm": 0.0037814818788319826,
      "learning_rate": 1.1011120866256951e-05,
      "loss": 0.0001,
      "step": 5329
    },
    {
      "epoch": 1.5598478197249048,
      "grad_norm": 0.0018479940481483936,
      "learning_rate": 1.100380450687738e-05,
      "loss": 0.0,
      "step": 5330
    },
    {
      "epoch": 1.5601404741000877,
      "grad_norm": 0.0019330501090735197,
      "learning_rate": 1.0996488147497805e-05,
      "loss": 0.0,
      "step": 5331
    },
    {
      "epoch": 1.5604331284752706,
      "grad_norm": 0.029701590538024902,
      "learning_rate": 1.0989171788118233e-05,
      "loss": 0.0002,
      "step": 5332
    },
    {
      "epoch": 1.5607257828504535,
      "grad_norm": 0.0005212106043472886,
      "learning_rate": 1.098185542873866e-05,
      "loss": 0.0,
      "step": 5333
    },
    {
      "epoch": 1.5610184372256364,
      "grad_norm": 0.0018254711758345366,
      "learning_rate": 1.0974539069359088e-05,
      "loss": 0.0,
      "step": 5334
    },
    {
      "epoch": 1.5613110916008193,
      "grad_norm": 0.0021136414725333452,
      "learning_rate": 1.0967222709979514e-05,
      "loss": 0.0,
      "step": 5335
    },
    {
      "epoch": 1.5616037459760024,
      "grad_norm": 0.0023741601034998894,
      "learning_rate": 1.0959906350599942e-05,
      "loss": 0.0001,
      "step": 5336
    },
    {
      "epoch": 1.5618964003511853,
      "grad_norm": 0.011876046657562256,
      "learning_rate": 1.0952589991220368e-05,
      "loss": 0.0001,
      "step": 5337
    },
    {
      "epoch": 1.5621890547263682,
      "grad_norm": 0.0011572998482733965,
      "learning_rate": 1.0945273631840796e-05,
      "loss": 0.0,
      "step": 5338
    },
    {
      "epoch": 1.562481709101551,
      "grad_norm": 0.012459277175366879,
      "learning_rate": 1.0937957272461224e-05,
      "loss": 0.0001,
      "step": 5339
    },
    {
      "epoch": 1.562774363476734,
      "grad_norm": 0.01071714423596859,
      "learning_rate": 1.093064091308165e-05,
      "loss": 0.0001,
      "step": 5340
    },
    {
      "epoch": 1.563067017851917,
      "grad_norm": 0.0024051477666944265,
      "learning_rate": 1.0923324553702078e-05,
      "loss": 0.0,
      "step": 5341
    },
    {
      "epoch": 1.5633596722271,
      "grad_norm": 0.00044596398947760463,
      "learning_rate": 1.0916008194322504e-05,
      "loss": 0.0,
      "step": 5342
    },
    {
      "epoch": 1.5636523266022828,
      "grad_norm": 0.00155029003508389,
      "learning_rate": 1.0908691834942932e-05,
      "loss": 0.0,
      "step": 5343
    },
    {
      "epoch": 1.5639449809774657,
      "grad_norm": 0.0009954465785995126,
      "learning_rate": 1.090137547556336e-05,
      "loss": 0.0,
      "step": 5344
    },
    {
      "epoch": 1.5642376353526486,
      "grad_norm": 0.0013960065552964807,
      "learning_rate": 1.0894059116183787e-05,
      "loss": 0.0,
      "step": 5345
    },
    {
      "epoch": 1.5645302897278315,
      "grad_norm": 0.0006535080610774457,
      "learning_rate": 1.0886742756804215e-05,
      "loss": 0.0,
      "step": 5346
    },
    {
      "epoch": 1.5648229441030144,
      "grad_norm": 0.0018199041951447725,
      "learning_rate": 1.0879426397424643e-05,
      "loss": 0.0,
      "step": 5347
    },
    {
      "epoch": 1.5651155984781973,
      "grad_norm": 0.007253793999552727,
      "learning_rate": 1.0872110038045069e-05,
      "loss": 0.0001,
      "step": 5348
    },
    {
      "epoch": 1.5654082528533801,
      "grad_norm": 0.004021256230771542,
      "learning_rate": 1.0864793678665497e-05,
      "loss": 0.0,
      "step": 5349
    },
    {
      "epoch": 1.565700907228563,
      "grad_norm": 0.002721029333770275,
      "learning_rate": 1.0857477319285923e-05,
      "loss": 0.0001,
      "step": 5350
    },
    {
      "epoch": 1.565993561603746,
      "grad_norm": 0.000878484221175313,
      "learning_rate": 1.0850160959906351e-05,
      "loss": 0.0,
      "step": 5351
    },
    {
      "epoch": 1.5662862159789288,
      "grad_norm": 0.0012332138139754534,
      "learning_rate": 1.0842844600526779e-05,
      "loss": 0.0,
      "step": 5352
    },
    {
      "epoch": 1.5665788703541117,
      "grad_norm": 0.0008330278797075152,
      "learning_rate": 1.0835528241147205e-05,
      "loss": 0.0,
      "step": 5353
    },
    {
      "epoch": 1.5668715247292946,
      "grad_norm": 0.0025928113609552383,
      "learning_rate": 1.0828211881767633e-05,
      "loss": 0.0001,
      "step": 5354
    },
    {
      "epoch": 1.5671641791044775,
      "grad_norm": 0.0011688877129927278,
      "learning_rate": 1.082089552238806e-05,
      "loss": 0.0,
      "step": 5355
    },
    {
      "epoch": 1.5674568334796604,
      "grad_norm": 0.0014505028957501054,
      "learning_rate": 1.0813579163008487e-05,
      "loss": 0.0,
      "step": 5356
    },
    {
      "epoch": 1.5677494878548435,
      "grad_norm": 0.0016532718436792493,
      "learning_rate": 1.0806262803628915e-05,
      "loss": 0.0,
      "step": 5357
    },
    {
      "epoch": 1.5680421422300264,
      "grad_norm": 0.007025901693850756,
      "learning_rate": 1.0798946444249341e-05,
      "loss": 0.0001,
      "step": 5358
    },
    {
      "epoch": 1.5683347966052092,
      "grad_norm": 0.001686119707301259,
      "learning_rate": 1.079163008486977e-05,
      "loss": 0.0,
      "step": 5359
    },
    {
      "epoch": 1.5686274509803921,
      "grad_norm": 0.0013476323802024126,
      "learning_rate": 1.0784313725490197e-05,
      "loss": 0.0,
      "step": 5360
    },
    {
      "epoch": 1.568920105355575,
      "grad_norm": 0.0018852063221856952,
      "learning_rate": 1.0776997366110624e-05,
      "loss": 0.0001,
      "step": 5361
    },
    {
      "epoch": 1.5692127597307581,
      "grad_norm": 0.0031956795137375593,
      "learning_rate": 1.0769681006731052e-05,
      "loss": 0.0001,
      "step": 5362
    },
    {
      "epoch": 1.569505414105941,
      "grad_norm": 0.0007793364929966629,
      "learning_rate": 1.0762364647351478e-05,
      "loss": 0.0,
      "step": 5363
    },
    {
      "epoch": 1.569798068481124,
      "grad_norm": 0.004618383478373289,
      "learning_rate": 1.0755048287971906e-05,
      "loss": 0.0001,
      "step": 5364
    },
    {
      "epoch": 1.5700907228563068,
      "grad_norm": 0.04225985333323479,
      "learning_rate": 1.0747731928592334e-05,
      "loss": 0.0002,
      "step": 5365
    },
    {
      "epoch": 1.5703833772314897,
      "grad_norm": 0.000692862959112972,
      "learning_rate": 1.074041556921276e-05,
      "loss": 0.0,
      "step": 5366
    },
    {
      "epoch": 1.5706760316066726,
      "grad_norm": 0.0008593127131462097,
      "learning_rate": 1.0733099209833188e-05,
      "loss": 0.0,
      "step": 5367
    },
    {
      "epoch": 1.5709686859818555,
      "grad_norm": 0.001766446977853775,
      "learning_rate": 1.0725782850453616e-05,
      "loss": 0.0,
      "step": 5368
    },
    {
      "epoch": 1.5712613403570383,
      "grad_norm": 0.0005354926688596606,
      "learning_rate": 1.0718466491074042e-05,
      "loss": 0.0,
      "step": 5369
    },
    {
      "epoch": 1.5715539947322212,
      "grad_norm": 0.0015676389448344707,
      "learning_rate": 1.071115013169447e-05,
      "loss": 0.0,
      "step": 5370
    },
    {
      "epoch": 1.5718466491074041,
      "grad_norm": 0.0008060939144343138,
      "learning_rate": 1.0703833772314896e-05,
      "loss": 0.0,
      "step": 5371
    },
    {
      "epoch": 1.572139303482587,
      "grad_norm": 0.0022210762836039066,
      "learning_rate": 1.0696517412935324e-05,
      "loss": 0.0,
      "step": 5372
    },
    {
      "epoch": 1.57243195785777,
      "grad_norm": 0.0008389541762880981,
      "learning_rate": 1.0689201053555752e-05,
      "loss": 0.0,
      "step": 5373
    },
    {
      "epoch": 1.5727246122329528,
      "grad_norm": 0.0008097761892713606,
      "learning_rate": 1.0681884694176179e-05,
      "loss": 0.0,
      "step": 5374
    },
    {
      "epoch": 1.5730172666081357,
      "grad_norm": 0.00037051280378364027,
      "learning_rate": 1.0674568334796606e-05,
      "loss": 0.0,
      "step": 5375
    },
    {
      "epoch": 1.5733099209833186,
      "grad_norm": 0.0004782450560014695,
      "learning_rate": 1.0667251975417033e-05,
      "loss": 0.0,
      "step": 5376
    },
    {
      "epoch": 1.5736025753585015,
      "grad_norm": 0.0011488194577395916,
      "learning_rate": 1.065993561603746e-05,
      "loss": 0.0,
      "step": 5377
    },
    {
      "epoch": 1.5738952297336846,
      "grad_norm": 0.0013666459126397967,
      "learning_rate": 1.0652619256657887e-05,
      "loss": 0.0,
      "step": 5378
    },
    {
      "epoch": 1.5741878841088675,
      "grad_norm": 0.000563440378755331,
      "learning_rate": 1.0645302897278315e-05,
      "loss": 0.0,
      "step": 5379
    },
    {
      "epoch": 1.5744805384840503,
      "grad_norm": 0.013980953954160213,
      "learning_rate": 1.0637986537898743e-05,
      "loss": 0.0001,
      "step": 5380
    },
    {
      "epoch": 1.5747731928592332,
      "grad_norm": 0.0008600152796134353,
      "learning_rate": 1.0630670178519169e-05,
      "loss": 0.0,
      "step": 5381
    },
    {
      "epoch": 1.5750658472344161,
      "grad_norm": 0.0011449077865108848,
      "learning_rate": 1.0623353819139597e-05,
      "loss": 0.0,
      "step": 5382
    },
    {
      "epoch": 1.5753585016095992,
      "grad_norm": 0.0003296992217656225,
      "learning_rate": 1.0616037459760023e-05,
      "loss": 0.0,
      "step": 5383
    },
    {
      "epoch": 1.5756511559847821,
      "grad_norm": 0.0017691970570012927,
      "learning_rate": 1.0608721100380451e-05,
      "loss": 0.0,
      "step": 5384
    },
    {
      "epoch": 1.575943810359965,
      "grad_norm": 0.0010965262772515416,
      "learning_rate": 1.0601404741000878e-05,
      "loss": 0.0,
      "step": 5385
    },
    {
      "epoch": 1.576236464735148,
      "grad_norm": 0.0016302278963848948,
      "learning_rate": 1.0594088381621305e-05,
      "loss": 0.0,
      "step": 5386
    },
    {
      "epoch": 1.5765291191103308,
      "grad_norm": 0.0007256264798343182,
      "learning_rate": 1.0586772022241732e-05,
      "loss": 0.0,
      "step": 5387
    },
    {
      "epoch": 1.5768217734855137,
      "grad_norm": 0.0013906812528148293,
      "learning_rate": 1.057945566286216e-05,
      "loss": 0.0,
      "step": 5388
    },
    {
      "epoch": 1.5771144278606966,
      "grad_norm": 0.013793290592730045,
      "learning_rate": 1.0572139303482588e-05,
      "loss": 0.0001,
      "step": 5389
    },
    {
      "epoch": 1.5774070822358794,
      "grad_norm": 0.0006986415246501565,
      "learning_rate": 1.0564822944103014e-05,
      "loss": 0.0,
      "step": 5390
    },
    {
      "epoch": 1.5776997366110623,
      "grad_norm": 3.0798137187957764,
      "learning_rate": 1.0557506584723442e-05,
      "loss": 0.2331,
      "step": 5391
    },
    {
      "epoch": 1.5779923909862452,
      "grad_norm": 0.0024522338062524796,
      "learning_rate": 1.055019022534387e-05,
      "loss": 0.0,
      "step": 5392
    },
    {
      "epoch": 1.578285045361428,
      "grad_norm": 0.0007080997456796467,
      "learning_rate": 1.0542873865964296e-05,
      "loss": 0.0,
      "step": 5393
    },
    {
      "epoch": 1.578577699736611,
      "grad_norm": 0.0005693368148058653,
      "learning_rate": 1.0535557506584724e-05,
      "loss": 0.0,
      "step": 5394
    },
    {
      "epoch": 1.5788703541117939,
      "grad_norm": 0.0035647971089929342,
      "learning_rate": 1.052824114720515e-05,
      "loss": 0.0001,
      "step": 5395
    },
    {
      "epoch": 1.5791630084869768,
      "grad_norm": 0.0051018777303397655,
      "learning_rate": 1.0520924787825578e-05,
      "loss": 0.0001,
      "step": 5396
    },
    {
      "epoch": 1.5794556628621597,
      "grad_norm": 0.01608877442777157,
      "learning_rate": 1.0513608428446006e-05,
      "loss": 0.0003,
      "step": 5397
    },
    {
      "epoch": 1.5797483172373425,
      "grad_norm": 0.024239428341388702,
      "learning_rate": 1.0506292069066432e-05,
      "loss": 0.0003,
      "step": 5398
    },
    {
      "epoch": 1.5800409716125257,
      "grad_norm": 0.023922836408019066,
      "learning_rate": 1.049897570968686e-05,
      "loss": 0.0004,
      "step": 5399
    },
    {
      "epoch": 1.5803336259877085,
      "grad_norm": 0.040479667484760284,
      "learning_rate": 1.0491659350307288e-05,
      "loss": 0.0007,
      "step": 5400
    },
    {
      "epoch": 1.5806262803628914,
      "grad_norm": 0.026322608813643456,
      "learning_rate": 1.0484342990927715e-05,
      "loss": 0.0005,
      "step": 5401
    },
    {
      "epoch": 1.5809189347380743,
      "grad_norm": 0.016565097495913506,
      "learning_rate": 1.0477026631548143e-05,
      "loss": 0.0003,
      "step": 5402
    },
    {
      "epoch": 1.5812115891132572,
      "grad_norm": 0.014207043685019016,
      "learning_rate": 1.0469710272168569e-05,
      "loss": 0.0002,
      "step": 5403
    },
    {
      "epoch": 1.58150424348844,
      "grad_norm": 4.747661590576172,
      "learning_rate": 1.0462393912788997e-05,
      "loss": 0.1737,
      "step": 5404
    },
    {
      "epoch": 1.5817968978636232,
      "grad_norm": 0.03960366174578667,
      "learning_rate": 1.0455077553409425e-05,
      "loss": 0.0007,
      "step": 5405
    },
    {
      "epoch": 1.582089552238806,
      "grad_norm": 0.053216978907585144,
      "learning_rate": 1.0447761194029851e-05,
      "loss": 0.001,
      "step": 5406
    },
    {
      "epoch": 1.582382206613989,
      "grad_norm": 0.01265775691717863,
      "learning_rate": 1.0440444834650279e-05,
      "loss": 0.0002,
      "step": 5407
    },
    {
      "epoch": 1.5826748609891719,
      "grad_norm": 0.01505838893353939,
      "learning_rate": 1.0433128475270707e-05,
      "loss": 0.0003,
      "step": 5408
    },
    {
      "epoch": 1.5829675153643548,
      "grad_norm": 0.01124452706426382,
      "learning_rate": 1.0425812115891133e-05,
      "loss": 0.0003,
      "step": 5409
    },
    {
      "epoch": 1.5832601697395376,
      "grad_norm": 0.015584814362227917,
      "learning_rate": 1.0418495756511561e-05,
      "loss": 0.0003,
      "step": 5410
    },
    {
      "epoch": 1.5835528241147205,
      "grad_norm": 0.01889246329665184,
      "learning_rate": 1.0411179397131987e-05,
      "loss": 0.0004,
      "step": 5411
    },
    {
      "epoch": 1.5838454784899034,
      "grad_norm": 0.0218829195946455,
      "learning_rate": 1.0403863037752415e-05,
      "loss": 0.0005,
      "step": 5412
    },
    {
      "epoch": 1.5841381328650863,
      "grad_norm": 0.005514830816537142,
      "learning_rate": 1.0396546678372843e-05,
      "loss": 0.0001,
      "step": 5413
    },
    {
      "epoch": 1.5844307872402692,
      "grad_norm": 0.36543434858322144,
      "learning_rate": 1.038923031899327e-05,
      "loss": 0.0017,
      "step": 5414
    },
    {
      "epoch": 1.584723441615452,
      "grad_norm": 0.00626228516921401,
      "learning_rate": 1.0381913959613697e-05,
      "loss": 0.0001,
      "step": 5415
    },
    {
      "epoch": 1.585016095990635,
      "grad_norm": 0.007346426136791706,
      "learning_rate": 1.0374597600234124e-05,
      "loss": 0.0001,
      "step": 5416
    },
    {
      "epoch": 1.5853087503658179,
      "grad_norm": 0.0025608576834201813,
      "learning_rate": 1.0367281240854552e-05,
      "loss": 0.0001,
      "step": 5417
    },
    {
      "epoch": 1.5856014047410008,
      "grad_norm": 0.0082298768684268,
      "learning_rate": 1.035996488147498e-05,
      "loss": 0.0002,
      "step": 5418
    },
    {
      "epoch": 1.5858940591161836,
      "grad_norm": 0.004710091277956963,
      "learning_rate": 1.0352648522095406e-05,
      "loss": 0.0001,
      "step": 5419
    },
    {
      "epoch": 1.5861867134913668,
      "grad_norm": 0.007210554555058479,
      "learning_rate": 1.0345332162715834e-05,
      "loss": 0.0002,
      "step": 5420
    },
    {
      "epoch": 1.5864793678665496,
      "grad_norm": 0.010958317667245865,
      "learning_rate": 1.0338015803336262e-05,
      "loss": 0.0003,
      "step": 5421
    },
    {
      "epoch": 1.5867720222417325,
      "grad_norm": 0.011359517462551594,
      "learning_rate": 1.0330699443956688e-05,
      "loss": 0.0002,
      "step": 5422
    },
    {
      "epoch": 1.5870646766169154,
      "grad_norm": 0.003848788794130087,
      "learning_rate": 1.0323383084577116e-05,
      "loss": 0.0001,
      "step": 5423
    },
    {
      "epoch": 1.5873573309920983,
      "grad_norm": 0.012078369036316872,
      "learning_rate": 1.0316066725197542e-05,
      "loss": 0.0002,
      "step": 5424
    },
    {
      "epoch": 1.5876499853672812,
      "grad_norm": 0.007848396897315979,
      "learning_rate": 1.030875036581797e-05,
      "loss": 0.0002,
      "step": 5425
    },
    {
      "epoch": 1.5879426397424643,
      "grad_norm": 0.009114718064665794,
      "learning_rate": 1.0301434006438396e-05,
      "loss": 0.0002,
      "step": 5426
    },
    {
      "epoch": 1.5882352941176472,
      "grad_norm": 0.001855672337114811,
      "learning_rate": 1.0294117647058824e-05,
      "loss": 0.0,
      "step": 5427
    },
    {
      "epoch": 1.58852794849283,
      "grad_norm": 0.0008037255611270666,
      "learning_rate": 1.028680128767925e-05,
      "loss": 0.0,
      "step": 5428
    },
    {
      "epoch": 1.588820602868013,
      "grad_norm": 0.004607463255524635,
      "learning_rate": 1.0279484928299679e-05,
      "loss": 0.0001,
      "step": 5429
    },
    {
      "epoch": 1.5891132572431959,
      "grad_norm": 0.02064778096973896,
      "learning_rate": 1.0272168568920105e-05,
      "loss": 0.0003,
      "step": 5430
    },
    {
      "epoch": 1.5894059116183787,
      "grad_norm": 0.007519340142607689,
      "learning_rate": 1.0264852209540533e-05,
      "loss": 0.0002,
      "step": 5431
    },
    {
      "epoch": 1.5896985659935616,
      "grad_norm": 0.004054190590977669,
      "learning_rate": 1.0257535850160959e-05,
      "loss": 0.0001,
      "step": 5432
    },
    {
      "epoch": 1.5899912203687445,
      "grad_norm": 0.007159278728067875,
      "learning_rate": 1.0250219490781387e-05,
      "loss": 0.0001,
      "step": 5433
    },
    {
      "epoch": 1.5902838747439274,
      "grad_norm": 0.005060483701527119,
      "learning_rate": 1.0242903131401815e-05,
      "loss": 0.0001,
      "step": 5434
    },
    {
      "epoch": 1.5905765291191103,
      "grad_norm": 0.004195989575237036,
      "learning_rate": 1.0235586772022241e-05,
      "loss": 0.0001,
      "step": 5435
    },
    {
      "epoch": 1.5908691834942932,
      "grad_norm": 0.004533226601779461,
      "learning_rate": 1.0228270412642669e-05,
      "loss": 0.0001,
      "step": 5436
    },
    {
      "epoch": 1.591161837869476,
      "grad_norm": 0.00588589021936059,
      "learning_rate": 1.0220954053263097e-05,
      "loss": 0.0001,
      "step": 5437
    },
    {
      "epoch": 1.591454492244659,
      "grad_norm": 0.004369951784610748,
      "learning_rate": 1.0213637693883523e-05,
      "loss": 0.0001,
      "step": 5438
    },
    {
      "epoch": 1.5917471466198418,
      "grad_norm": 0.020003627985715866,
      "learning_rate": 1.0206321334503951e-05,
      "loss": 0.0003,
      "step": 5439
    },
    {
      "epoch": 1.5920398009950247,
      "grad_norm": 0.004071712959557772,
      "learning_rate": 1.0199004975124378e-05,
      "loss": 0.0001,
      "step": 5440
    },
    {
      "epoch": 1.5923324553702076,
      "grad_norm": 0.005775037687271833,
      "learning_rate": 1.0191688615744805e-05,
      "loss": 0.0002,
      "step": 5441
    },
    {
      "epoch": 1.5926251097453907,
      "grad_norm": 0.0015077510615810752,
      "learning_rate": 1.0184372256365233e-05,
      "loss": 0.0,
      "step": 5442
    },
    {
      "epoch": 1.5929177641205736,
      "grad_norm": 0.005627945531159639,
      "learning_rate": 1.017705589698566e-05,
      "loss": 0.0001,
      "step": 5443
    },
    {
      "epoch": 1.5932104184957565,
      "grad_norm": 0.3537636697292328,
      "learning_rate": 1.0169739537606088e-05,
      "loss": 0.0017,
      "step": 5444
    },
    {
      "epoch": 1.5935030728709394,
      "grad_norm": 0.021307745948433876,
      "learning_rate": 1.0162423178226516e-05,
      "loss": 0.0003,
      "step": 5445
    },
    {
      "epoch": 1.5937957272461223,
      "grad_norm": 0.004862764850258827,
      "learning_rate": 1.0155106818846942e-05,
      "loss": 0.0001,
      "step": 5446
    },
    {
      "epoch": 1.5940883816213054,
      "grad_norm": 0.001179985934868455,
      "learning_rate": 1.014779045946737e-05,
      "loss": 0.0,
      "step": 5447
    },
    {
      "epoch": 1.5943810359964883,
      "grad_norm": 0.004859209526330233,
      "learning_rate": 1.0140474100087796e-05,
      "loss": 0.0001,
      "step": 5448
    },
    {
      "epoch": 1.5946736903716712,
      "grad_norm": 0.0020784807857125998,
      "learning_rate": 1.0133157740708224e-05,
      "loss": 0.0,
      "step": 5449
    },
    {
      "epoch": 1.594966344746854,
      "grad_norm": 0.005648862570524216,
      "learning_rate": 1.0125841381328652e-05,
      "loss": 0.0001,
      "step": 5450
    },
    {
      "epoch": 1.595258999122037,
      "grad_norm": 0.0021079182624816895,
      "learning_rate": 1.0118525021949078e-05,
      "loss": 0.0,
      "step": 5451
    },
    {
      "epoch": 1.5955516534972198,
      "grad_norm": 0.0028216817881911993,
      "learning_rate": 1.0111208662569506e-05,
      "loss": 0.0001,
      "step": 5452
    },
    {
      "epoch": 1.5958443078724027,
      "grad_norm": 0.002231398830190301,
      "learning_rate": 1.0103892303189934e-05,
      "loss": 0.0,
      "step": 5453
    },
    {
      "epoch": 1.5961369622475856,
      "grad_norm": 0.0026541994884610176,
      "learning_rate": 1.009657594381036e-05,
      "loss": 0.0001,
      "step": 5454
    },
    {
      "epoch": 1.5964296166227685,
      "grad_norm": 11.862154006958008,
      "learning_rate": 1.0089259584430788e-05,
      "loss": 0.1116,
      "step": 5455
    },
    {
      "epoch": 1.5967222709979514,
      "grad_norm": 0.0055429027415812016,
      "learning_rate": 1.0081943225051215e-05,
      "loss": 0.0001,
      "step": 5456
    },
    {
      "epoch": 1.5970149253731343,
      "grad_norm": 0.015475308522582054,
      "learning_rate": 1.0074626865671643e-05,
      "loss": 0.0002,
      "step": 5457
    },
    {
      "epoch": 1.5973075797483172,
      "grad_norm": 0.006429288536310196,
      "learning_rate": 1.006731050629207e-05,
      "loss": 0.0001,
      "step": 5458
    },
    {
      "epoch": 1.5976002341235,
      "grad_norm": 0.0036033045034855604,
      "learning_rate": 1.0059994146912497e-05,
      "loss": 0.0001,
      "step": 5459
    },
    {
      "epoch": 1.597892888498683,
      "grad_norm": 0.006906921975314617,
      "learning_rate": 1.0052677787532925e-05,
      "loss": 0.0001,
      "step": 5460
    },
    {
      "epoch": 1.5981855428738658,
      "grad_norm": 0.0003840086574200541,
      "learning_rate": 1.0045361428153353e-05,
      "loss": 0.0,
      "step": 5461
    },
    {
      "epoch": 1.5984781972490487,
      "grad_norm": 0.0009076825808733702,
      "learning_rate": 1.0038045068773779e-05,
      "loss": 0.0,
      "step": 5462
    },
    {
      "epoch": 1.5987708516242318,
      "grad_norm": 0.0018735651392489672,
      "learning_rate": 1.0030728709394207e-05,
      "loss": 0.0001,
      "step": 5463
    },
    {
      "epoch": 1.5990635059994147,
      "grad_norm": 0.004747231025248766,
      "learning_rate": 1.0023412350014633e-05,
      "loss": 0.0,
      "step": 5464
    },
    {
      "epoch": 1.5993561603745976,
      "grad_norm": 0.007176742423325777,
      "learning_rate": 1.0016095990635061e-05,
      "loss": 0.0001,
      "step": 5465
    },
    {
      "epoch": 1.5996488147497805,
      "grad_norm": 0.004211073741316795,
      "learning_rate": 1.0008779631255489e-05,
      "loss": 0.0001,
      "step": 5466
    },
    {
      "epoch": 1.5999414691249634,
      "grad_norm": 0.004269284196197987,
      "learning_rate": 1.0001463271875915e-05,
      "loss": 0.0001,
      "step": 5467
    },
    {
      "epoch": 1.6002341235001465,
      "grad_norm": 0.0071305325254797935,
      "learning_rate": 9.994146912496343e-06,
      "loss": 0.0001,
      "step": 5468
    },
    {
      "epoch": 1.6005267778753294,
      "grad_norm": 0.01426814403384924,
      "learning_rate": 9.98683055311677e-06,
      "loss": 0.0002,
      "step": 5469
    },
    {
      "epoch": 1.6008194322505123,
      "grad_norm": 0.0011637783609330654,
      "learning_rate": 9.979514193737197e-06,
      "loss": 0.0,
      "step": 5470
    },
    {
      "epoch": 1.6011120866256952,
      "grad_norm": 0.0064765228889882565,
      "learning_rate": 9.972197834357624e-06,
      "loss": 0.0001,
      "step": 5471
    },
    {
      "epoch": 1.601404741000878,
      "grad_norm": 0.005298473406583071,
      "learning_rate": 9.964881474978052e-06,
      "loss": 0.0001,
      "step": 5472
    },
    {
      "epoch": 1.601697395376061,
      "grad_norm": 0.6955668926239014,
      "learning_rate": 9.957565115598478e-06,
      "loss": 0.002,
      "step": 5473
    },
    {
      "epoch": 1.6019900497512438,
      "grad_norm": 0.0010658545652404428,
      "learning_rate": 9.950248756218906e-06,
      "loss": 0.0,
      "step": 5474
    },
    {
      "epoch": 1.6022827041264267,
      "grad_norm": 0.0014335220912471414,
      "learning_rate": 9.942932396839332e-06,
      "loss": 0.0,
      "step": 5475
    },
    {
      "epoch": 1.6025753585016096,
      "grad_norm": 0.0029462689999490976,
      "learning_rate": 9.93561603745976e-06,
      "loss": 0.0001,
      "step": 5476
    },
    {
      "epoch": 1.6028680128767925,
      "grad_norm": 0.0032651119399815798,
      "learning_rate": 9.928299678080188e-06,
      "loss": 0.0001,
      "step": 5477
    },
    {
      "epoch": 1.6031606672519754,
      "grad_norm": 0.004083502572029829,
      "learning_rate": 9.920983318700614e-06,
      "loss": 0.0001,
      "step": 5478
    },
    {
      "epoch": 1.6034533216271583,
      "grad_norm": 0.0063823312520980835,
      "learning_rate": 9.913666959321042e-06,
      "loss": 0.0002,
      "step": 5479
    },
    {
      "epoch": 1.6037459760023411,
      "grad_norm": 0.0018457062542438507,
      "learning_rate": 9.906350599941468e-06,
      "loss": 0.0,
      "step": 5480
    },
    {
      "epoch": 1.604038630377524,
      "grad_norm": 0.0017032860778272152,
      "learning_rate": 9.899034240561896e-06,
      "loss": 0.0,
      "step": 5481
    },
    {
      "epoch": 1.604331284752707,
      "grad_norm": 0.0014460444217547774,
      "learning_rate": 9.891717881182324e-06,
      "loss": 0.0,
      "step": 5482
    },
    {
      "epoch": 1.6046239391278898,
      "grad_norm": 0.002544417278841138,
      "learning_rate": 9.88440152180275e-06,
      "loss": 0.0001,
      "step": 5483
    },
    {
      "epoch": 1.604916593503073,
      "grad_norm": 0.003711249679327011,
      "learning_rate": 9.877085162423179e-06,
      "loss": 0.0001,
      "step": 5484
    },
    {
      "epoch": 1.6052092478782558,
      "grad_norm": 0.0012813995126634836,
      "learning_rate": 9.869768803043605e-06,
      "loss": 0.0,
      "step": 5485
    },
    {
      "epoch": 1.6055019022534387,
      "grad_norm": 0.01935703307390213,
      "learning_rate": 9.862452443664033e-06,
      "loss": 0.0001,
      "step": 5486
    },
    {
      "epoch": 1.6057945566286216,
      "grad_norm": 0.003095586085692048,
      "learning_rate": 9.85513608428446e-06,
      "loss": 0.0,
      "step": 5487
    },
    {
      "epoch": 1.6060872110038045,
      "grad_norm": 0.0031561930663883686,
      "learning_rate": 9.847819724904887e-06,
      "loss": 0.0001,
      "step": 5488
    },
    {
      "epoch": 1.6063798653789876,
      "grad_norm": 1.153779149055481,
      "learning_rate": 9.840503365525315e-06,
      "loss": 0.0061,
      "step": 5489
    },
    {
      "epoch": 1.6066725197541705,
      "grad_norm": 0.0033556653652340174,
      "learning_rate": 9.833187006145743e-06,
      "loss": 0.0001,
      "step": 5490
    },
    {
      "epoch": 1.6069651741293534,
      "grad_norm": 0.0040247077122330666,
      "learning_rate": 9.825870646766169e-06,
      "loss": 0.0001,
      "step": 5491
    },
    {
      "epoch": 1.6072578285045362,
      "grad_norm": 0.005303109996020794,
      "learning_rate": 9.818554287386597e-06,
      "loss": 0.0001,
      "step": 5492
    },
    {
      "epoch": 1.6075504828797191,
      "grad_norm": 0.0048234895803034306,
      "learning_rate": 9.811237928007023e-06,
      "loss": 0.0001,
      "step": 5493
    },
    {
      "epoch": 1.607843137254902,
      "grad_norm": 0.0017658272990956903,
      "learning_rate": 9.803921568627451e-06,
      "loss": 0.0,
      "step": 5494
    },
    {
      "epoch": 1.608135791630085,
      "grad_norm": 0.0017520791152492166,
      "learning_rate": 9.79660520924788e-06,
      "loss": 0.0,
      "step": 5495
    },
    {
      "epoch": 1.6084284460052678,
      "grad_norm": 0.0030433055944740772,
      "learning_rate": 9.789288849868305e-06,
      "loss": 0.0,
      "step": 5496
    },
    {
      "epoch": 1.6087211003804507,
      "grad_norm": 0.0010577259818091989,
      "learning_rate": 9.781972490488733e-06,
      "loss": 0.0,
      "step": 5497
    },
    {
      "epoch": 1.6090137547556336,
      "grad_norm": 0.0006441577570512891,
      "learning_rate": 9.774656131109161e-06,
      "loss": 0.0,
      "step": 5498
    },
    {
      "epoch": 1.6093064091308165,
      "grad_norm": 0.0019147149287164211,
      "learning_rate": 9.767339771729588e-06,
      "loss": 0.0,
      "step": 5499
    },
    {
      "epoch": 1.6095990635059994,
      "grad_norm": 0.002758044982329011,
      "learning_rate": 9.760023412350016e-06,
      "loss": 0.0,
      "step": 5500
    },
    {
      "epoch": 1.6098917178811822,
      "grad_norm": 0.0017483183182775974,
      "learning_rate": 9.752707052970442e-06,
      "loss": 0.0,
      "step": 5501
    },
    {
      "epoch": 1.6101843722563651,
      "grad_norm": 0.07355238497257233,
      "learning_rate": 9.74539069359087e-06,
      "loss": 0.0004,
      "step": 5502
    },
    {
      "epoch": 1.610477026631548,
      "grad_norm": 0.004786691628396511,
      "learning_rate": 9.738074334211298e-06,
      "loss": 0.0,
      "step": 5503
    },
    {
      "epoch": 1.610769681006731,
      "grad_norm": 0.0004528155841398984,
      "learning_rate": 9.730757974831724e-06,
      "loss": 0.0,
      "step": 5504
    },
    {
      "epoch": 1.611062335381914,
      "grad_norm": 0.0020629502832889557,
      "learning_rate": 9.723441615452152e-06,
      "loss": 0.0,
      "step": 5505
    },
    {
      "epoch": 1.611354989757097,
      "grad_norm": 0.0018566844519227743,
      "learning_rate": 9.71612525607258e-06,
      "loss": 0.0,
      "step": 5506
    },
    {
      "epoch": 1.6116476441322798,
      "grad_norm": 0.0012614359147846699,
      "learning_rate": 9.708808896693006e-06,
      "loss": 0.0,
      "step": 5507
    },
    {
      "epoch": 1.6119402985074627,
      "grad_norm": 0.009515652433037758,
      "learning_rate": 9.701492537313434e-06,
      "loss": 0.0001,
      "step": 5508
    },
    {
      "epoch": 1.6122329528826456,
      "grad_norm": 0.001026952755637467,
      "learning_rate": 9.69417617793386e-06,
      "loss": 0.0,
      "step": 5509
    },
    {
      "epoch": 1.6125256072578285,
      "grad_norm": 0.0011891749454662204,
      "learning_rate": 9.686859818554288e-06,
      "loss": 0.0,
      "step": 5510
    },
    {
      "epoch": 1.6128182616330116,
      "grad_norm": 0.003185735549777746,
      "learning_rate": 9.679543459174716e-06,
      "loss": 0.0,
      "step": 5511
    },
    {
      "epoch": 1.6131109160081945,
      "grad_norm": 0.0013077668845653534,
      "learning_rate": 9.672227099795142e-06,
      "loss": 0.0,
      "step": 5512
    },
    {
      "epoch": 1.6134035703833773,
      "grad_norm": 0.0011455434141680598,
      "learning_rate": 9.66491074041557e-06,
      "loss": 0.0,
      "step": 5513
    },
    {
      "epoch": 1.6136962247585602,
      "grad_norm": 0.0009633139125071466,
      "learning_rate": 9.657594381035998e-06,
      "loss": 0.0,
      "step": 5514
    },
    {
      "epoch": 1.6139888791337431,
      "grad_norm": 0.005264429375529289,
      "learning_rate": 9.650278021656425e-06,
      "loss": 0.0,
      "step": 5515
    },
    {
      "epoch": 1.614281533508926,
      "grad_norm": 0.008792583830654621,
      "learning_rate": 9.642961662276853e-06,
      "loss": 0.0001,
      "step": 5516
    },
    {
      "epoch": 1.614574187884109,
      "grad_norm": 0.0020714502315968275,
      "learning_rate": 9.635645302897279e-06,
      "loss": 0.0,
      "step": 5517
    },
    {
      "epoch": 1.6148668422592918,
      "grad_norm": 0.0029987585730850697,
      "learning_rate": 9.628328943517707e-06,
      "loss": 0.0,
      "step": 5518
    },
    {
      "epoch": 1.6151594966344747,
      "grad_norm": 0.0023104194551706314,
      "learning_rate": 9.621012584138133e-06,
      "loss": 0.0,
      "step": 5519
    },
    {
      "epoch": 1.6154521510096576,
      "grad_norm": 0.008713692426681519,
      "learning_rate": 9.613696224758561e-06,
      "loss": 0.0001,
      "step": 5520
    },
    {
      "epoch": 1.6157448053848404,
      "grad_norm": 0.000451488682301715,
      "learning_rate": 9.606379865378987e-06,
      "loss": 0.0,
      "step": 5521
    },
    {
      "epoch": 1.6160374597600233,
      "grad_norm": 0.010159490630030632,
      "learning_rate": 9.599063505999415e-06,
      "loss": 0.0001,
      "step": 5522
    },
    {
      "epoch": 1.6163301141352062,
      "grad_norm": 0.0007553183240815997,
      "learning_rate": 9.591747146619841e-06,
      "loss": 0.0,
      "step": 5523
    },
    {
      "epoch": 1.616622768510389,
      "grad_norm": 0.513761579990387,
      "learning_rate": 9.58443078724027e-06,
      "loss": 0.0037,
      "step": 5524
    },
    {
      "epoch": 1.616915422885572,
      "grad_norm": 0.0022291538771241903,
      "learning_rate": 9.577114427860696e-06,
      "loss": 0.0,
      "step": 5525
    },
    {
      "epoch": 1.6172080772607549,
      "grad_norm": 0.10149343311786652,
      "learning_rate": 9.569798068481124e-06,
      "loss": 0.0003,
      "step": 5526
    },
    {
      "epoch": 1.617500731635938,
      "grad_norm": 0.0024327621795237064,
      "learning_rate": 9.562481709101552e-06,
      "loss": 0.0,
      "step": 5527
    },
    {
      "epoch": 1.6177933860111209,
      "grad_norm": 0.0017709648236632347,
      "learning_rate": 9.555165349721978e-06,
      "loss": 0.0,
      "step": 5528
    },
    {
      "epoch": 1.6180860403863038,
      "grad_norm": 0.0004216205852571875,
      "learning_rate": 9.547848990342406e-06,
      "loss": 0.0,
      "step": 5529
    },
    {
      "epoch": 1.6183786947614867,
      "grad_norm": 0.06876334547996521,
      "learning_rate": 9.540532630962834e-06,
      "loss": 0.0004,
      "step": 5530
    },
    {
      "epoch": 1.6186713491366695,
      "grad_norm": 0.002134463516995311,
      "learning_rate": 9.53321627158326e-06,
      "loss": 0.0,
      "step": 5531
    },
    {
      "epoch": 1.6189640035118527,
      "grad_norm": 0.008550962433218956,
      "learning_rate": 9.525899912203688e-06,
      "loss": 0.0001,
      "step": 5532
    },
    {
      "epoch": 1.6192566578870355,
      "grad_norm": 0.0006103214109316468,
      "learning_rate": 9.518583552824114e-06,
      "loss": 0.0,
      "step": 5533
    },
    {
      "epoch": 1.6195493122622184,
      "grad_norm": 0.0005587683990597725,
      "learning_rate": 9.511267193444542e-06,
      "loss": 0.0,
      "step": 5534
    },
    {
      "epoch": 1.6198419666374013,
      "grad_norm": 0.0008965490851551294,
      "learning_rate": 9.50395083406497e-06,
      "loss": 0.0,
      "step": 5535
    },
    {
      "epoch": 1.6201346210125842,
      "grad_norm": 0.0025804329197853804,
      "learning_rate": 9.496634474685396e-06,
      "loss": 0.0,
      "step": 5536
    },
    {
      "epoch": 1.620427275387767,
      "grad_norm": 0.0016825495986267924,
      "learning_rate": 9.489318115305824e-06,
      "loss": 0.0,
      "step": 5537
    },
    {
      "epoch": 1.62071992976295,
      "grad_norm": 0.001002712408080697,
      "learning_rate": 9.482001755926252e-06,
      "loss": 0.0,
      "step": 5538
    },
    {
      "epoch": 1.6210125841381329,
      "grad_norm": 0.0002484250580891967,
      "learning_rate": 9.474685396546679e-06,
      "loss": 0.0,
      "step": 5539
    },
    {
      "epoch": 1.6213052385133158,
      "grad_norm": 0.0004409311804920435,
      "learning_rate": 9.467369037167106e-06,
      "loss": 0.0,
      "step": 5540
    },
    {
      "epoch": 1.6215978928884986,
      "grad_norm": 0.0027556063141673803,
      "learning_rate": 9.460052677787533e-06,
      "loss": 0.0,
      "step": 5541
    },
    {
      "epoch": 1.6218905472636815,
      "grad_norm": 0.0011143895098939538,
      "learning_rate": 9.45273631840796e-06,
      "loss": 0.0,
      "step": 5542
    },
    {
      "epoch": 1.6221832016388644,
      "grad_norm": 0.003866976359859109,
      "learning_rate": 9.445419959028389e-06,
      "loss": 0.0,
      "step": 5543
    },
    {
      "epoch": 1.6224758560140473,
      "grad_norm": 0.001845609163865447,
      "learning_rate": 9.438103599648815e-06,
      "loss": 0.0,
      "step": 5544
    },
    {
      "epoch": 1.6227685103892302,
      "grad_norm": 0.0027286650147289038,
      "learning_rate": 9.430787240269243e-06,
      "loss": 0.0001,
      "step": 5545
    },
    {
      "epoch": 1.623061164764413,
      "grad_norm": 0.0007557672797702253,
      "learning_rate": 9.423470880889669e-06,
      "loss": 0.0,
      "step": 5546
    },
    {
      "epoch": 1.623353819139596,
      "grad_norm": 0.0007909720879979432,
      "learning_rate": 9.416154521510097e-06,
      "loss": 0.0,
      "step": 5547
    },
    {
      "epoch": 1.623646473514779,
      "grad_norm": 0.0005950201884843409,
      "learning_rate": 9.408838162130525e-06,
      "loss": 0.0,
      "step": 5548
    },
    {
      "epoch": 1.623939127889962,
      "grad_norm": 0.0003656030457932502,
      "learning_rate": 9.401521802750951e-06,
      "loss": 0.0,
      "step": 5549
    },
    {
      "epoch": 1.6242317822651449,
      "grad_norm": 0.0009229332208633423,
      "learning_rate": 9.39420544337138e-06,
      "loss": 0.0,
      "step": 5550
    },
    {
      "epoch": 1.6245244366403278,
      "grad_norm": 0.0010801558382809162,
      "learning_rate": 9.386889083991807e-06,
      "loss": 0.0,
      "step": 5551
    },
    {
      "epoch": 1.6248170910155106,
      "grad_norm": 0.005564079154282808,
      "learning_rate": 9.379572724612233e-06,
      "loss": 0.0001,
      "step": 5552
    },
    {
      "epoch": 1.6251097453906937,
      "grad_norm": 0.0021888986229896545,
      "learning_rate": 9.372256365232661e-06,
      "loss": 0.0,
      "step": 5553
    },
    {
      "epoch": 1.6254023997658766,
      "grad_norm": 0.001076598186045885,
      "learning_rate": 9.364940005853088e-06,
      "loss": 0.0,
      "step": 5554
    },
    {
      "epoch": 1.6256950541410595,
      "grad_norm": 0.00043485217611305416,
      "learning_rate": 9.357623646473516e-06,
      "loss": 0.0,
      "step": 5555
    },
    {
      "epoch": 1.6259877085162424,
      "grad_norm": 0.00046464326442219317,
      "learning_rate": 9.350307287093944e-06,
      "loss": 0.0,
      "step": 5556
    },
    {
      "epoch": 1.6262803628914253,
      "grad_norm": 0.00032036672928370535,
      "learning_rate": 9.34299092771437e-06,
      "loss": 0.0,
      "step": 5557
    },
    {
      "epoch": 1.6265730172666082,
      "grad_norm": 0.0007653280044905841,
      "learning_rate": 9.335674568334798e-06,
      "loss": 0.0,
      "step": 5558
    },
    {
      "epoch": 1.626865671641791,
      "grad_norm": 0.0007247430039569736,
      "learning_rate": 9.328358208955226e-06,
      "loss": 0.0,
      "step": 5559
    },
    {
      "epoch": 1.627158326016974,
      "grad_norm": 0.000609236944001168,
      "learning_rate": 9.321041849575652e-06,
      "loss": 0.0,
      "step": 5560
    },
    {
      "epoch": 1.6274509803921569,
      "grad_norm": 3.0711312294006348,
      "learning_rate": 9.31372549019608e-06,
      "loss": 0.0043,
      "step": 5561
    },
    {
      "epoch": 1.6277436347673397,
      "grad_norm": 0.0011066512670367956,
      "learning_rate": 9.306409130816506e-06,
      "loss": 0.0,
      "step": 5562
    },
    {
      "epoch": 1.6280362891425226,
      "grad_norm": 0.0012488741194829345,
      "learning_rate": 9.299092771436934e-06,
      "loss": 0.0,
      "step": 5563
    },
    {
      "epoch": 1.6283289435177055,
      "grad_norm": 0.0015597431920468807,
      "learning_rate": 9.29177641205736e-06,
      "loss": 0.0,
      "step": 5564
    },
    {
      "epoch": 1.6286215978928884,
      "grad_norm": 0.00021354974887799472,
      "learning_rate": 9.284460052677788e-06,
      "loss": 0.0,
      "step": 5565
    },
    {
      "epoch": 1.6289142522680713,
      "grad_norm": 0.001786278560757637,
      "learning_rate": 9.277143693298215e-06,
      "loss": 0.0,
      "step": 5566
    },
    {
      "epoch": 1.6292069066432542,
      "grad_norm": 4.217692852020264,
      "learning_rate": 9.269827333918642e-06,
      "loss": 0.1724,
      "step": 5567
    },
    {
      "epoch": 1.629499561018437,
      "grad_norm": 0.014737540856003761,
      "learning_rate": 9.262510974539069e-06,
      "loss": 0.0001,
      "step": 5568
    },
    {
      "epoch": 1.6297922153936202,
      "grad_norm": 0.031021222472190857,
      "learning_rate": 9.255194615159497e-06,
      "loss": 0.0002,
      "step": 5569
    },
    {
      "epoch": 1.630084869768803,
      "grad_norm": 0.002510768361389637,
      "learning_rate": 9.247878255779923e-06,
      "loss": 0.0,
      "step": 5570
    },
    {
      "epoch": 1.630377524143986,
      "grad_norm": 0.012060116045176983,
      "learning_rate": 9.240561896400351e-06,
      "loss": 0.0001,
      "step": 5571
    },
    {
      "epoch": 1.6306701785191688,
      "grad_norm": 0.00570687185972929,
      "learning_rate": 9.233245537020779e-06,
      "loss": 0.0001,
      "step": 5572
    },
    {
      "epoch": 1.6309628328943517,
      "grad_norm": 0.0003556690935511142,
      "learning_rate": 9.225929177641205e-06,
      "loss": 0.0,
      "step": 5573
    },
    {
      "epoch": 1.6312554872695348,
      "grad_norm": 26.108539581298828,
      "learning_rate": 9.218612818261633e-06,
      "loss": 0.0918,
      "step": 5574
    },
    {
      "epoch": 1.6315481416447177,
      "grad_norm": 0.005321469157934189,
      "learning_rate": 9.211296458882061e-06,
      "loss": 0.0001,
      "step": 5575
    },
    {
      "epoch": 1.6318407960199006,
      "grad_norm": 0.0020039896480739117,
      "learning_rate": 9.203980099502487e-06,
      "loss": 0.0,
      "step": 5576
    },
    {
      "epoch": 1.6321334503950835,
      "grad_norm": 0.01536587718874216,
      "learning_rate": 9.196663740122915e-06,
      "loss": 0.0001,
      "step": 5577
    },
    {
      "epoch": 1.6324261047702664,
      "grad_norm": 0.005312270950525999,
      "learning_rate": 9.189347380743341e-06,
      "loss": 0.0001,
      "step": 5578
    },
    {
      "epoch": 1.6327187591454493,
      "grad_norm": 0.004251805599778891,
      "learning_rate": 9.18203102136377e-06,
      "loss": 0.0001,
      "step": 5579
    },
    {
      "epoch": 1.6330114135206322,
      "grad_norm": 0.05280857905745506,
      "learning_rate": 9.174714661984197e-06,
      "loss": 0.0003,
      "step": 5580
    },
    {
      "epoch": 1.633304067895815,
      "grad_norm": 0.0005378371570259333,
      "learning_rate": 9.167398302604624e-06,
      "loss": 0.0,
      "step": 5581
    },
    {
      "epoch": 1.633596722270998,
      "grad_norm": 0.14982321858406067,
      "learning_rate": 9.160081943225052e-06,
      "loss": 0.0015,
      "step": 5582
    },
    {
      "epoch": 1.6338893766461808,
      "grad_norm": 0.10131068527698517,
      "learning_rate": 9.15276558384548e-06,
      "loss": 0.0006,
      "step": 5583
    },
    {
      "epoch": 1.6341820310213637,
      "grad_norm": 0.011011369526386261,
      "learning_rate": 9.145449224465906e-06,
      "loss": 0.0001,
      "step": 5584
    },
    {
      "epoch": 1.6344746853965466,
      "grad_norm": 0.004358216188848019,
      "learning_rate": 9.138132865086334e-06,
      "loss": 0.0001,
      "step": 5585
    },
    {
      "epoch": 1.6347673397717295,
      "grad_norm": 0.004245396703481674,
      "learning_rate": 9.13081650570676e-06,
      "loss": 0.0001,
      "step": 5586
    },
    {
      "epoch": 1.6350599941469124,
      "grad_norm": 0.021265236660838127,
      "learning_rate": 9.123500146327188e-06,
      "loss": 0.0002,
      "step": 5587
    },
    {
      "epoch": 1.6353526485220953,
      "grad_norm": 0.004796027671545744,
      "learning_rate": 9.116183786947616e-06,
      "loss": 0.0001,
      "step": 5588
    },
    {
      "epoch": 1.6356453028972782,
      "grad_norm": 0.0007314679096452892,
      "learning_rate": 9.108867427568042e-06,
      "loss": 0.0,
      "step": 5589
    },
    {
      "epoch": 1.6359379572724613,
      "grad_norm": 0.008435488678514957,
      "learning_rate": 9.10155106818847e-06,
      "loss": 0.0001,
      "step": 5590
    },
    {
      "epoch": 1.6362306116476442,
      "grad_norm": 0.0021220894996076822,
      "learning_rate": 9.094234708808898e-06,
      "loss": 0.0,
      "step": 5591
    },
    {
      "epoch": 1.636523266022827,
      "grad_norm": 0.011719580739736557,
      "learning_rate": 9.086918349429324e-06,
      "loss": 0.0002,
      "step": 5592
    },
    {
      "epoch": 1.63681592039801,
      "grad_norm": 0.0012470482615754008,
      "learning_rate": 9.079601990049752e-06,
      "loss": 0.0,
      "step": 5593
    },
    {
      "epoch": 1.6371085747731928,
      "grad_norm": 6.902370929718018,
      "learning_rate": 9.072285630670179e-06,
      "loss": 0.0652,
      "step": 5594
    },
    {
      "epoch": 1.6374012291483757,
      "grad_norm": 0.11666715145111084,
      "learning_rate": 9.064969271290606e-06,
      "loss": 0.0018,
      "step": 5595
    },
    {
      "epoch": 1.6376938835235588,
      "grad_norm": 0.15053029358386993,
      "learning_rate": 9.057652911911034e-06,
      "loss": 0.0009,
      "step": 5596
    },
    {
      "epoch": 1.6379865378987417,
      "grad_norm": 0.0008611659286543727,
      "learning_rate": 9.05033655253146e-06,
      "loss": 0.0,
      "step": 5597
    },
    {
      "epoch": 1.6382791922739246,
      "grad_norm": 0.0003323563141748309,
      "learning_rate": 9.043020193151889e-06,
      "loss": 0.0,
      "step": 5598
    },
    {
      "epoch": 1.6385718466491075,
      "grad_norm": 19.84071922302246,
      "learning_rate": 9.035703833772317e-06,
      "loss": 0.0436,
      "step": 5599
    },
    {
      "epoch": 1.6388645010242904,
      "grad_norm": 0.0016030750703066587,
      "learning_rate": 9.028387474392743e-06,
      "loss": 0.0,
      "step": 5600
    },
    {
      "epoch": 1.6391571553994733,
      "grad_norm": 0.0007422008784487844,
      "learning_rate": 9.02107111501317e-06,
      "loss": 0.0,
      "step": 5601
    },
    {
      "epoch": 1.6394498097746562,
      "grad_norm": 0.0137802017852664,
      "learning_rate": 9.013754755633597e-06,
      "loss": 0.0001,
      "step": 5602
    },
    {
      "epoch": 1.639742464149839,
      "grad_norm": 0.004875142592936754,
      "learning_rate": 9.006438396254025e-06,
      "loss": 0.0001,
      "step": 5603
    },
    {
      "epoch": 1.640035118525022,
      "grad_norm": 0.017864851281046867,
      "learning_rate": 8.999122036874453e-06,
      "loss": 0.0001,
      "step": 5604
    },
    {
      "epoch": 1.6403277729002048,
      "grad_norm": 0.0029294146224856377,
      "learning_rate": 8.99180567749488e-06,
      "loss": 0.0,
      "step": 5605
    },
    {
      "epoch": 1.6406204272753877,
      "grad_norm": 0.05114160105586052,
      "learning_rate": 8.984489318115307e-06,
      "loss": 0.0001,
      "step": 5606
    },
    {
      "epoch": 1.6409130816505706,
      "grad_norm": 0.001088925520889461,
      "learning_rate": 8.977172958735733e-06,
      "loss": 0.0,
      "step": 5607
    },
    {
      "epoch": 1.6412057360257535,
      "grad_norm": 0.0010532621527090669,
      "learning_rate": 8.969856599356161e-06,
      "loss": 0.0,
      "step": 5608
    },
    {
      "epoch": 1.6414983904009364,
      "grad_norm": 0.012787350453436375,
      "learning_rate": 8.962540239976588e-06,
      "loss": 0.0002,
      "step": 5609
    },
    {
      "epoch": 1.6417910447761193,
      "grad_norm": 0.0012421078281477094,
      "learning_rate": 8.955223880597016e-06,
      "loss": 0.0,
      "step": 5610
    },
    {
      "epoch": 1.6420836991513024,
      "grad_norm": 0.04169453680515289,
      "learning_rate": 8.947907521217442e-06,
      "loss": 0.0005,
      "step": 5611
    },
    {
      "epoch": 1.6423763535264853,
      "grad_norm": 0.21967889368534088,
      "learning_rate": 8.94059116183787e-06,
      "loss": 0.0008,
      "step": 5612
    },
    {
      "epoch": 1.6426690079016681,
      "grad_norm": 0.00621822802349925,
      "learning_rate": 8.933274802458296e-06,
      "loss": 0.0001,
      "step": 5613
    },
    {
      "epoch": 1.642961662276851,
      "grad_norm": 0.0066894544288516045,
      "learning_rate": 8.925958443078724e-06,
      "loss": 0.0001,
      "step": 5614
    },
    {
      "epoch": 1.643254316652034,
      "grad_norm": 0.0031689521856606007,
      "learning_rate": 8.91864208369915e-06,
      "loss": 0.0,
      "step": 5615
    },
    {
      "epoch": 1.6435469710272168,
      "grad_norm": 0.0013706308091059327,
      "learning_rate": 8.911325724319578e-06,
      "loss": 0.0,
      "step": 5616
    },
    {
      "epoch": 1.6438396254024,
      "grad_norm": 0.0011935265501961112,
      "learning_rate": 8.904009364940006e-06,
      "loss": 0.0,
      "step": 5617
    },
    {
      "epoch": 1.6441322797775828,
      "grad_norm": 0.0029069141019135714,
      "learning_rate": 8.896693005560432e-06,
      "loss": 0.0,
      "step": 5618
    },
    {
      "epoch": 1.6444249341527657,
      "grad_norm": 0.0016296259127557278,
      "learning_rate": 8.88937664618086e-06,
      "loss": 0.0,
      "step": 5619
    },
    {
      "epoch": 1.6447175885279486,
      "grad_norm": 0.0017259359592571855,
      "learning_rate": 8.882060286801288e-06,
      "loss": 0.0,
      "step": 5620
    },
    {
      "epoch": 1.6450102429031315,
      "grad_norm": 0.001674041268415749,
      "learning_rate": 8.874743927421715e-06,
      "loss": 0.0,
      "step": 5621
    },
    {
      "epoch": 1.6453028972783144,
      "grad_norm": 0.004651952069252729,
      "learning_rate": 8.867427568042142e-06,
      "loss": 0.0001,
      "step": 5622
    },
    {
      "epoch": 1.6455955516534972,
      "grad_norm": 0.0013309124624356627,
      "learning_rate": 8.860111208662569e-06,
      "loss": 0.0,
      "step": 5623
    },
    {
      "epoch": 1.6458882060286801,
      "grad_norm": 0.0045665837824344635,
      "learning_rate": 8.852794849282997e-06,
      "loss": 0.0001,
      "step": 5624
    },
    {
      "epoch": 1.646180860403863,
      "grad_norm": 0.0037553836591541767,
      "learning_rate": 8.845478489903425e-06,
      "loss": 0.0,
      "step": 5625
    },
    {
      "epoch": 1.646473514779046,
      "grad_norm": 0.011135270819067955,
      "learning_rate": 8.838162130523851e-06,
      "loss": 0.0001,
      "step": 5626
    },
    {
      "epoch": 1.6467661691542288,
      "grad_norm": 0.010377340018749237,
      "learning_rate": 8.830845771144279e-06,
      "loss": 0.0001,
      "step": 5627
    },
    {
      "epoch": 1.6470588235294117,
      "grad_norm": 0.002636183286085725,
      "learning_rate": 8.823529411764707e-06,
      "loss": 0.0001,
      "step": 5628
    },
    {
      "epoch": 1.6473514779045946,
      "grad_norm": 13.157976150512695,
      "learning_rate": 8.816213052385133e-06,
      "loss": 0.069,
      "step": 5629
    },
    {
      "epoch": 1.6476441322797775,
      "grad_norm": 9.887460708618164,
      "learning_rate": 8.808896693005561e-06,
      "loss": 0.0368,
      "step": 5630
    },
    {
      "epoch": 1.6479367866549604,
      "grad_norm": 0.010155647993087769,
      "learning_rate": 8.801580333625987e-06,
      "loss": 0.0001,
      "step": 5631
    },
    {
      "epoch": 1.6482294410301432,
      "grad_norm": 0.0005502697313204408,
      "learning_rate": 8.794263974246415e-06,
      "loss": 0.0,
      "step": 5632
    },
    {
      "epoch": 1.6485220954053263,
      "grad_norm": 0.0068548452109098434,
      "learning_rate": 8.786947614866843e-06,
      "loss": 0.0001,
      "step": 5633
    },
    {
      "epoch": 1.6488147497805092,
      "grad_norm": 0.0030745361000299454,
      "learning_rate": 8.77963125548727e-06,
      "loss": 0.0,
      "step": 5634
    },
    {
      "epoch": 1.6491074041556921,
      "grad_norm": 0.00037383261951617897,
      "learning_rate": 8.772314896107697e-06,
      "loss": 0.0,
      "step": 5635
    },
    {
      "epoch": 1.649400058530875,
      "grad_norm": 0.0018102648900821805,
      "learning_rate": 8.764998536728125e-06,
      "loss": 0.0,
      "step": 5636
    },
    {
      "epoch": 1.649692712906058,
      "grad_norm": 0.0008046173606999218,
      "learning_rate": 8.757682177348552e-06,
      "loss": 0.0,
      "step": 5637
    },
    {
      "epoch": 1.649985367281241,
      "grad_norm": 0.0004981788224540651,
      "learning_rate": 8.75036581796898e-06,
      "loss": 0.0,
      "step": 5638
    },
    {
      "epoch": 1.650278021656424,
      "grad_norm": 0.0017273883568122983,
      "learning_rate": 8.743049458589406e-06,
      "loss": 0.0,
      "step": 5639
    },
    {
      "epoch": 1.6505706760316068,
      "grad_norm": 0.0026937955990433693,
      "learning_rate": 8.735733099209834e-06,
      "loss": 0.0,
      "step": 5640
    },
    {
      "epoch": 1.6508633304067897,
      "grad_norm": 0.0070621841587126255,
      "learning_rate": 8.728416739830262e-06,
      "loss": 0.0001,
      "step": 5641
    },
    {
      "epoch": 1.6511559847819726,
      "grad_norm": 0.0021231193095445633,
      "learning_rate": 8.721100380450688e-06,
      "loss": 0.0,
      "step": 5642
    },
    {
      "epoch": 1.6514486391571555,
      "grad_norm": 0.0005490577896125615,
      "learning_rate": 8.713784021071116e-06,
      "loss": 0.0,
      "step": 5643
    },
    {
      "epoch": 1.6517412935323383,
      "grad_norm": 0.00028466907679103315,
      "learning_rate": 8.706467661691544e-06,
      "loss": 0.0,
      "step": 5644
    },
    {
      "epoch": 1.6520339479075212,
      "grad_norm": 0.0001460868224967271,
      "learning_rate": 8.69915130231197e-06,
      "loss": 0.0,
      "step": 5645
    },
    {
      "epoch": 1.6523266022827041,
      "grad_norm": 0.0008332178695127368,
      "learning_rate": 8.691834942932398e-06,
      "loss": 0.0,
      "step": 5646
    },
    {
      "epoch": 1.652619256657887,
      "grad_norm": 0.001206650398671627,
      "learning_rate": 8.684518583552824e-06,
      "loss": 0.0,
      "step": 5647
    },
    {
      "epoch": 1.65291191103307,
      "grad_norm": 0.002128488617017865,
      "learning_rate": 8.677202224173252e-06,
      "loss": 0.0,
      "step": 5648
    },
    {
      "epoch": 1.6532045654082528,
      "grad_norm": 0.0008632836979813874,
      "learning_rate": 8.66988586479368e-06,
      "loss": 0.0,
      "step": 5649
    },
    {
      "epoch": 1.6534972197834357,
      "grad_norm": 0.000147968516102992,
      "learning_rate": 8.662569505414106e-06,
      "loss": 0.0,
      "step": 5650
    },
    {
      "epoch": 1.6537898741586186,
      "grad_norm": 22.177335739135742,
      "learning_rate": 8.655253146034534e-06,
      "loss": 0.0928,
      "step": 5651
    },
    {
      "epoch": 1.6540825285338014,
      "grad_norm": 0.0005284177605062723,
      "learning_rate": 8.647936786654962e-06,
      "loss": 0.0,
      "step": 5652
    },
    {
      "epoch": 1.6543751829089843,
      "grad_norm": 0.0003988874377682805,
      "learning_rate": 8.640620427275389e-06,
      "loss": 0.0,
      "step": 5653
    },
    {
      "epoch": 1.6546678372841674,
      "grad_norm": 0.001079960959032178,
      "learning_rate": 8.633304067895817e-06,
      "loss": 0.0,
      "step": 5654
    },
    {
      "epoch": 1.6549604916593503,
      "grad_norm": 0.0009095671703107655,
      "learning_rate": 8.625987708516243e-06,
      "loss": 0.0,
      "step": 5655
    },
    {
      "epoch": 1.6552531460345332,
      "grad_norm": 0.00140785810071975,
      "learning_rate": 8.61867134913667e-06,
      "loss": 0.0,
      "step": 5656
    },
    {
      "epoch": 1.655545800409716,
      "grad_norm": 0.002098365221172571,
      "learning_rate": 8.611354989757097e-06,
      "loss": 0.0,
      "step": 5657
    },
    {
      "epoch": 1.655838454784899,
      "grad_norm": 0.0018707603449001908,
      "learning_rate": 8.604038630377525e-06,
      "loss": 0.0,
      "step": 5658
    },
    {
      "epoch": 1.656131109160082,
      "grad_norm": 0.002513097133487463,
      "learning_rate": 8.596722270997951e-06,
      "loss": 0.0,
      "step": 5659
    },
    {
      "epoch": 1.656423763535265,
      "grad_norm": 0.00026852835435420275,
      "learning_rate": 8.58940591161838e-06,
      "loss": 0.0,
      "step": 5660
    },
    {
      "epoch": 1.6567164179104479,
      "grad_norm": 0.00040217710193246603,
      "learning_rate": 8.582089552238805e-06,
      "loss": 0.0,
      "step": 5661
    },
    {
      "epoch": 1.6570090722856308,
      "grad_norm": 0.0013781053712591529,
      "learning_rate": 8.574773192859233e-06,
      "loss": 0.0,
      "step": 5662
    },
    {
      "epoch": 1.6573017266608137,
      "grad_norm": 10.875997543334961,
      "learning_rate": 8.56745683347966e-06,
      "loss": 0.0121,
      "step": 5663
    },
    {
      "epoch": 1.6575943810359965,
      "grad_norm": 0.0004421852936502546,
      "learning_rate": 8.560140474100088e-06,
      "loss": 0.0,
      "step": 5664
    },
    {
      "epoch": 1.6578870354111794,
      "grad_norm": 0.003944370895624161,
      "learning_rate": 8.552824114720516e-06,
      "loss": 0.0001,
      "step": 5665
    },
    {
      "epoch": 1.6581796897863623,
      "grad_norm": 0.0005194034310989082,
      "learning_rate": 8.545507755340942e-06,
      "loss": 0.0,
      "step": 5666
    },
    {
      "epoch": 1.6584723441615452,
      "grad_norm": 0.000154358524014242,
      "learning_rate": 8.53819139596137e-06,
      "loss": 0.0,
      "step": 5667
    },
    {
      "epoch": 1.658764998536728,
      "grad_norm": 2.193861484527588,
      "learning_rate": 8.530875036581798e-06,
      "loss": 0.0058,
      "step": 5668
    },
    {
      "epoch": 1.659057652911911,
      "grad_norm": 0.00018022813310381025,
      "learning_rate": 8.523558677202224e-06,
      "loss": 0.0,
      "step": 5669
    },
    {
      "epoch": 1.6593503072870939,
      "grad_norm": 0.0004651829949580133,
      "learning_rate": 8.516242317822652e-06,
      "loss": 0.0,
      "step": 5670
    },
    {
      "epoch": 1.6596429616622768,
      "grad_norm": 0.0011934270150959492,
      "learning_rate": 8.508925958443078e-06,
      "loss": 0.0,
      "step": 5671
    },
    {
      "epoch": 1.6599356160374596,
      "grad_norm": 0.0004671476490329951,
      "learning_rate": 8.501609599063506e-06,
      "loss": 0.0,
      "step": 5672
    },
    {
      "epoch": 1.6602282704126425,
      "grad_norm": 0.0004122898099012673,
      "learning_rate": 8.494293239683934e-06,
      "loss": 0.0,
      "step": 5673
    },
    {
      "epoch": 1.6605209247878254,
      "grad_norm": 0.0004934952594339848,
      "learning_rate": 8.48697688030436e-06,
      "loss": 0.0,
      "step": 5674
    },
    {
      "epoch": 1.6608135791630085,
      "grad_norm": 0.0037849058862775564,
      "learning_rate": 8.479660520924788e-06,
      "loss": 0.0001,
      "step": 5675
    },
    {
      "epoch": 1.6611062335381914,
      "grad_norm": 0.0011096809757873416,
      "learning_rate": 8.472344161545215e-06,
      "loss": 0.0,
      "step": 5676
    },
    {
      "epoch": 1.6613988879133743,
      "grad_norm": 0.0008264588541351259,
      "learning_rate": 8.465027802165642e-06,
      "loss": 0.0,
      "step": 5677
    },
    {
      "epoch": 1.6616915422885572,
      "grad_norm": 0.0005098911351524293,
      "learning_rate": 8.45771144278607e-06,
      "loss": 0.0,
      "step": 5678
    },
    {
      "epoch": 1.66198419666374,
      "grad_norm": 0.0015530316159129143,
      "learning_rate": 8.450395083406497e-06,
      "loss": 0.0,
      "step": 5679
    },
    {
      "epoch": 1.6622768510389232,
      "grad_norm": 0.001707770861685276,
      "learning_rate": 8.443078724026925e-06,
      "loss": 0.0,
      "step": 5680
    },
    {
      "epoch": 1.662569505414106,
      "grad_norm": 0.000391901470720768,
      "learning_rate": 8.435762364647353e-06,
      "loss": 0.0,
      "step": 5681
    },
    {
      "epoch": 1.662862159789289,
      "grad_norm": 0.00044289889046922326,
      "learning_rate": 8.428446005267779e-06,
      "loss": 0.0,
      "step": 5682
    },
    {
      "epoch": 1.6631548141644719,
      "grad_norm": 0.0008527369936928153,
      "learning_rate": 8.421129645888207e-06,
      "loss": 0.0,
      "step": 5683
    },
    {
      "epoch": 1.6634474685396547,
      "grad_norm": 0.00022882982739247382,
      "learning_rate": 8.413813286508633e-06,
      "loss": 0.0,
      "step": 5684
    },
    {
      "epoch": 1.6637401229148376,
      "grad_norm": 0.0015376824885606766,
      "learning_rate": 8.406496927129061e-06,
      "loss": 0.0,
      "step": 5685
    },
    {
      "epoch": 1.6640327772900205,
      "grad_norm": 0.006001113913953304,
      "learning_rate": 8.399180567749489e-06,
      "loss": 0.0,
      "step": 5686
    },
    {
      "epoch": 1.6643254316652034,
      "grad_norm": 0.000480707676615566,
      "learning_rate": 8.391864208369915e-06,
      "loss": 0.0,
      "step": 5687
    },
    {
      "epoch": 1.6646180860403863,
      "grad_norm": 0.0010196049697697163,
      "learning_rate": 8.384547848990343e-06,
      "loss": 0.0,
      "step": 5688
    },
    {
      "epoch": 1.6649107404155692,
      "grad_norm": 0.0002625976630952209,
      "learning_rate": 8.377231489610771e-06,
      "loss": 0.0,
      "step": 5689
    },
    {
      "epoch": 1.665203394790752,
      "grad_norm": 0.0020561853889375925,
      "learning_rate": 8.369915130231197e-06,
      "loss": 0.0,
      "step": 5690
    },
    {
      "epoch": 1.665496049165935,
      "grad_norm": 0.004877160768955946,
      "learning_rate": 8.362598770851625e-06,
      "loss": 0.0,
      "step": 5691
    },
    {
      "epoch": 1.6657887035411179,
      "grad_norm": 0.0002567913325037807,
      "learning_rate": 8.355282411472052e-06,
      "loss": 0.0,
      "step": 5692
    },
    {
      "epoch": 1.6660813579163007,
      "grad_norm": 0.0006398832774721086,
      "learning_rate": 8.34796605209248e-06,
      "loss": 0.0,
      "step": 5693
    },
    {
      "epoch": 1.6663740122914836,
      "grad_norm": 0.0006991418777033687,
      "learning_rate": 8.340649692712907e-06,
      "loss": 0.0,
      "step": 5694
    },
    {
      "epoch": 1.6666666666666665,
      "grad_norm": 0.0005723349750041962,
      "learning_rate": 8.333333333333334e-06,
      "loss": 0.0,
      "step": 5695
    },
    {
      "epoch": 1.6669593210418496,
      "grad_norm": 0.000251155550358817,
      "learning_rate": 8.326016973953762e-06,
      "loss": 0.0,
      "step": 5696
    },
    {
      "epoch": 1.6672519754170325,
      "grad_norm": 0.007094892673194408,
      "learning_rate": 8.31870061457419e-06,
      "loss": 0.0001,
      "step": 5697
    },
    {
      "epoch": 1.6675446297922154,
      "grad_norm": 0.0020860317163169384,
      "learning_rate": 8.311384255194616e-06,
      "loss": 0.0,
      "step": 5698
    },
    {
      "epoch": 1.6678372841673983,
      "grad_norm": 0.0011908210581168532,
      "learning_rate": 8.304067895815044e-06,
      "loss": 0.0,
      "step": 5699
    },
    {
      "epoch": 1.6681299385425812,
      "grad_norm": 0.0003415268729440868,
      "learning_rate": 8.29675153643547e-06,
      "loss": 0.0,
      "step": 5700
    },
    {
      "epoch": 1.668422592917764,
      "grad_norm": 0.0010934275342151523,
      "learning_rate": 8.289435177055898e-06,
      "loss": 0.0,
      "step": 5701
    },
    {
      "epoch": 1.6687152472929472,
      "grad_norm": 0.00020715589926112443,
      "learning_rate": 8.282118817676324e-06,
      "loss": 0.0,
      "step": 5702
    },
    {
      "epoch": 1.66900790166813,
      "grad_norm": 0.0003347251040395349,
      "learning_rate": 8.274802458296752e-06,
      "loss": 0.0,
      "step": 5703
    },
    {
      "epoch": 1.669300556043313,
      "grad_norm": 0.0010050522396340966,
      "learning_rate": 8.267486098917179e-06,
      "loss": 0.0,
      "step": 5704
    },
    {
      "epoch": 1.6695932104184958,
      "grad_norm": 0.0010485193924978375,
      "learning_rate": 8.260169739537606e-06,
      "loss": 0.0,
      "step": 5705
    },
    {
      "epoch": 1.6698858647936787,
      "grad_norm": 0.0003816711832769215,
      "learning_rate": 8.252853380158033e-06,
      "loss": 0.0,
      "step": 5706
    },
    {
      "epoch": 1.6701785191688616,
      "grad_norm": 0.00030886335298419,
      "learning_rate": 8.24553702077846e-06,
      "loss": 0.0,
      "step": 5707
    },
    {
      "epoch": 1.6704711735440445,
      "grad_norm": 0.0003605361853260547,
      "learning_rate": 8.238220661398887e-06,
      "loss": 0.0,
      "step": 5708
    },
    {
      "epoch": 1.6707638279192274,
      "grad_norm": 0.000228889737627469,
      "learning_rate": 8.230904302019315e-06,
      "loss": 0.0,
      "step": 5709
    },
    {
      "epoch": 1.6710564822944103,
      "grad_norm": 0.0003481085877865553,
      "learning_rate": 8.223587942639743e-06,
      "loss": 0.0,
      "step": 5710
    },
    {
      "epoch": 1.6713491366695932,
      "grad_norm": 0.0017270563403144479,
      "learning_rate": 8.216271583260169e-06,
      "loss": 0.0,
      "step": 5711
    },
    {
      "epoch": 1.671641791044776,
      "grad_norm": 0.0026479167863726616,
      "learning_rate": 8.208955223880597e-06,
      "loss": 0.0,
      "step": 5712
    },
    {
      "epoch": 1.671934445419959,
      "grad_norm": 0.00040890229865908623,
      "learning_rate": 8.201638864501025e-06,
      "loss": 0.0,
      "step": 5713
    },
    {
      "epoch": 1.6722270997951418,
      "grad_norm": 0.0006078731385059655,
      "learning_rate": 8.194322505121451e-06,
      "loss": 0.0,
      "step": 5714
    },
    {
      "epoch": 1.6725197541703247,
      "grad_norm": 0.0004306174232624471,
      "learning_rate": 8.18700614574188e-06,
      "loss": 0.0,
      "step": 5715
    },
    {
      "epoch": 1.6728124085455076,
      "grad_norm": 0.0004859520122408867,
      "learning_rate": 8.179689786362305e-06,
      "loss": 0.0,
      "step": 5716
    },
    {
      "epoch": 1.6731050629206905,
      "grad_norm": 0.0002467651211190969,
      "learning_rate": 8.172373426982733e-06,
      "loss": 0.0,
      "step": 5717
    },
    {
      "epoch": 1.6733977172958736,
      "grad_norm": 0.00070102111203596,
      "learning_rate": 8.165057067603161e-06,
      "loss": 0.0,
      "step": 5718
    },
    {
      "epoch": 1.6736903716710565,
      "grad_norm": 0.0001780585735104978,
      "learning_rate": 8.157740708223588e-06,
      "loss": 0.0,
      "step": 5719
    },
    {
      "epoch": 1.6739830260462394,
      "grad_norm": 3.630791425704956,
      "learning_rate": 8.150424348844016e-06,
      "loss": 0.1884,
      "step": 5720
    },
    {
      "epoch": 1.6742756804214223,
      "grad_norm": 0.0020441662054508924,
      "learning_rate": 8.143107989464443e-06,
      "loss": 0.0,
      "step": 5721
    },
    {
      "epoch": 1.6745683347966052,
      "grad_norm": 0.001493821619078517,
      "learning_rate": 8.13579163008487e-06,
      "loss": 0.0,
      "step": 5722
    },
    {
      "epoch": 1.6748609891717883,
      "grad_norm": 0.0010016279993578792,
      "learning_rate": 8.128475270705298e-06,
      "loss": 0.0,
      "step": 5723
    },
    {
      "epoch": 1.6751536435469712,
      "grad_norm": 0.00042762173688970506,
      "learning_rate": 8.121158911325724e-06,
      "loss": 0.0,
      "step": 5724
    },
    {
      "epoch": 1.675446297922154,
      "grad_norm": 0.00023038113431539387,
      "learning_rate": 8.113842551946152e-06,
      "loss": 0.0,
      "step": 5725
    },
    {
      "epoch": 1.675738952297337,
      "grad_norm": 0.0010480210185050964,
      "learning_rate": 8.10652619256658e-06,
      "loss": 0.0,
      "step": 5726
    },
    {
      "epoch": 1.6760316066725198,
      "grad_norm": 0.000737055903300643,
      "learning_rate": 8.099209833187006e-06,
      "loss": 0.0,
      "step": 5727
    },
    {
      "epoch": 1.6763242610477027,
      "grad_norm": 0.0007961266674101353,
      "learning_rate": 8.091893473807434e-06,
      "loss": 0.0,
      "step": 5728
    },
    {
      "epoch": 1.6766169154228856,
      "grad_norm": 0.0014897778164595366,
      "learning_rate": 8.084577114427862e-06,
      "loss": 0.0,
      "step": 5729
    },
    {
      "epoch": 1.6769095697980685,
      "grad_norm": 0.003211492905393243,
      "learning_rate": 8.077260755048288e-06,
      "loss": 0.0001,
      "step": 5730
    },
    {
      "epoch": 1.6772022241732514,
      "grad_norm": 0.0006992241251282394,
      "learning_rate": 8.069944395668716e-06,
      "loss": 0.0,
      "step": 5731
    },
    {
      "epoch": 1.6774948785484343,
      "grad_norm": 0.0003750199975911528,
      "learning_rate": 8.062628036289142e-06,
      "loss": 0.0,
      "step": 5732
    },
    {
      "epoch": 1.6777875329236172,
      "grad_norm": 0.0006417161202989519,
      "learning_rate": 8.05531167690957e-06,
      "loss": 0.0,
      "step": 5733
    },
    {
      "epoch": 1.6780801872988,
      "grad_norm": 0.0015260449144989252,
      "learning_rate": 8.047995317529998e-06,
      "loss": 0.0,
      "step": 5734
    },
    {
      "epoch": 1.678372841673983,
      "grad_norm": 0.0005805394030176103,
      "learning_rate": 8.040678958150425e-06,
      "loss": 0.0,
      "step": 5735
    },
    {
      "epoch": 1.6786654960491658,
      "grad_norm": 0.001112207886762917,
      "learning_rate": 8.033362598770853e-06,
      "loss": 0.0,
      "step": 5736
    },
    {
      "epoch": 1.6789581504243487,
      "grad_norm": 0.002090575871989131,
      "learning_rate": 8.02604623939128e-06,
      "loss": 0.0,
      "step": 5737
    },
    {
      "epoch": 1.6792508047995316,
      "grad_norm": 0.0011604102328419685,
      "learning_rate": 8.018729880011707e-06,
      "loss": 0.0,
      "step": 5738
    },
    {
      "epoch": 1.6795434591747147,
      "grad_norm": 0.0008536601671949029,
      "learning_rate": 8.011413520632135e-06,
      "loss": 0.0,
      "step": 5739
    },
    {
      "epoch": 1.6798361135498976,
      "grad_norm": 0.004838922992348671,
      "learning_rate": 8.004097161252561e-06,
      "loss": 0.0001,
      "step": 5740
    },
    {
      "epoch": 1.6801287679250805,
      "grad_norm": 0.0020388218108564615,
      "learning_rate": 7.996780801872989e-06,
      "loss": 0.0,
      "step": 5741
    },
    {
      "epoch": 1.6804214223002634,
      "grad_norm": 0.015307688154280186,
      "learning_rate": 7.989464442493417e-06,
      "loss": 0.0002,
      "step": 5742
    },
    {
      "epoch": 1.6807140766754463,
      "grad_norm": 0.00044861010974273086,
      "learning_rate": 7.982148083113843e-06,
      "loss": 0.0,
      "step": 5743
    },
    {
      "epoch": 1.6810067310506294,
      "grad_norm": 0.006845089606940746,
      "learning_rate": 7.974831723734271e-06,
      "loss": 0.0001,
      "step": 5744
    },
    {
      "epoch": 1.6812993854258123,
      "grad_norm": 0.004504820331931114,
      "learning_rate": 7.967515364354697e-06,
      "loss": 0.0001,
      "step": 5745
    },
    {
      "epoch": 1.6815920398009951,
      "grad_norm": 0.001339981216005981,
      "learning_rate": 7.960199004975125e-06,
      "loss": 0.0,
      "step": 5746
    },
    {
      "epoch": 1.681884694176178,
      "grad_norm": 0.002657198579981923,
      "learning_rate": 7.952882645595552e-06,
      "loss": 0.0001,
      "step": 5747
    },
    {
      "epoch": 1.682177348551361,
      "grad_norm": 0.0030452387873083353,
      "learning_rate": 7.94556628621598e-06,
      "loss": 0.0001,
      "step": 5748
    },
    {
      "epoch": 1.6824700029265438,
      "grad_norm": 0.002118358388543129,
      "learning_rate": 7.938249926836406e-06,
      "loss": 0.0,
      "step": 5749
    },
    {
      "epoch": 1.6827626573017267,
      "grad_norm": 0.006794402375817299,
      "learning_rate": 7.930933567456834e-06,
      "loss": 0.0001,
      "step": 5750
    },
    {
      "epoch": 1.6830553116769096,
      "grad_norm": 0.0026812430005520582,
      "learning_rate": 7.92361720807726e-06,
      "loss": 0.0001,
      "step": 5751
    },
    {
      "epoch": 1.6833479660520925,
      "grad_norm": 0.004444151651114225,
      "learning_rate": 7.916300848697688e-06,
      "loss": 0.0001,
      "step": 5752
    },
    {
      "epoch": 1.6836406204272754,
      "grad_norm": 0.003096078522503376,
      "learning_rate": 7.908984489318114e-06,
      "loss": 0.0001,
      "step": 5753
    },
    {
      "epoch": 1.6839332748024582,
      "grad_norm": 0.0019110736902803183,
      "learning_rate": 7.901668129938542e-06,
      "loss": 0.0,
      "step": 5754
    },
    {
      "epoch": 1.6842259291776411,
      "grad_norm": 0.005232947412878275,
      "learning_rate": 7.89435177055897e-06,
      "loss": 0.0001,
      "step": 5755
    },
    {
      "epoch": 1.684518583552824,
      "grad_norm": 0.003011050634086132,
      "learning_rate": 7.887035411179396e-06,
      "loss": 0.0001,
      "step": 5756
    },
    {
      "epoch": 1.684811237928007,
      "grad_norm": 0.0004928670823574066,
      "learning_rate": 7.879719051799824e-06,
      "loss": 0.0,
      "step": 5757
    },
    {
      "epoch": 1.6851038923031898,
      "grad_norm": 0.006043503992259502,
      "learning_rate": 7.872402692420252e-06,
      "loss": 0.0001,
      "step": 5758
    },
    {
      "epoch": 1.6853965466783727,
      "grad_norm": 0.0013670605840161443,
      "learning_rate": 7.865086333040679e-06,
      "loss": 0.0,
      "step": 5759
    },
    {
      "epoch": 1.6856892010535558,
      "grad_norm": 0.0008556185639463365,
      "learning_rate": 7.857769973661106e-06,
      "loss": 0.0,
      "step": 5760
    },
    {
      "epoch": 1.6859818554287387,
      "grad_norm": 0.0012822890421375632,
      "learning_rate": 7.850453614281533e-06,
      "loss": 0.0,
      "step": 5761
    },
    {
      "epoch": 1.6862745098039216,
      "grad_norm": 0.0002606217167340219,
      "learning_rate": 7.84313725490196e-06,
      "loss": 0.0,
      "step": 5762
    },
    {
      "epoch": 1.6865671641791045,
      "grad_norm": 0.0017107477178797126,
      "learning_rate": 7.835820895522389e-06,
      "loss": 0.0,
      "step": 5763
    },
    {
      "epoch": 1.6868598185542873,
      "grad_norm": 0.0019767291378229856,
      "learning_rate": 7.828504536142815e-06,
      "loss": 0.0,
      "step": 5764
    },
    {
      "epoch": 1.6871524729294705,
      "grad_norm": 10.906447410583496,
      "learning_rate": 7.821188176763243e-06,
      "loss": 0.0728,
      "step": 5765
    },
    {
      "epoch": 1.6874451273046533,
      "grad_norm": 0.0011748663382604718,
      "learning_rate": 7.81387181738367e-06,
      "loss": 0.0,
      "step": 5766
    },
    {
      "epoch": 1.6877377816798362,
      "grad_norm": 0.08097922801971436,
      "learning_rate": 7.806555458004097e-06,
      "loss": 0.0006,
      "step": 5767
    },
    {
      "epoch": 1.6880304360550191,
      "grad_norm": 0.00511398958042264,
      "learning_rate": 7.799239098624525e-06,
      "loss": 0.0001,
      "step": 5768
    },
    {
      "epoch": 1.688323090430202,
      "grad_norm": 0.006898779422044754,
      "learning_rate": 7.791922739244951e-06,
      "loss": 0.0001,
      "step": 5769
    },
    {
      "epoch": 1.688615744805385,
      "grad_norm": 0.0006506079225800931,
      "learning_rate": 7.78460637986538e-06,
      "loss": 0.0,
      "step": 5770
    },
    {
      "epoch": 1.6889083991805678,
      "grad_norm": 0.005674232728779316,
      "learning_rate": 7.777290020485807e-06,
      "loss": 0.0001,
      "step": 5771
    },
    {
      "epoch": 1.6892010535557507,
      "grad_norm": 0.0003180634812451899,
      "learning_rate": 7.769973661106233e-06,
      "loss": 0.0,
      "step": 5772
    },
    {
      "epoch": 1.6894937079309336,
      "grad_norm": 0.023299410939216614,
      "learning_rate": 7.762657301726661e-06,
      "loss": 0.0002,
      "step": 5773
    },
    {
      "epoch": 1.6897863623061165,
      "grad_norm": 0.0024151585530489683,
      "learning_rate": 7.75534094234709e-06,
      "loss": 0.0,
      "step": 5774
    },
    {
      "epoch": 1.6900790166812993,
      "grad_norm": 0.0006375240045599639,
      "learning_rate": 7.748024582967516e-06,
      "loss": 0.0,
      "step": 5775
    },
    {
      "epoch": 1.6903716710564822,
      "grad_norm": 0.005888013169169426,
      "learning_rate": 7.740708223587943e-06,
      "loss": 0.0001,
      "step": 5776
    },
    {
      "epoch": 1.6906643254316651,
      "grad_norm": 0.002546227304264903,
      "learning_rate": 7.73339186420837e-06,
      "loss": 0.0,
      "step": 5777
    },
    {
      "epoch": 1.690956979806848,
      "grad_norm": 0.0007193618221208453,
      "learning_rate": 7.726075504828798e-06,
      "loss": 0.0,
      "step": 5778
    },
    {
      "epoch": 1.691249634182031,
      "grad_norm": 0.006669328082352877,
      "learning_rate": 7.718759145449226e-06,
      "loss": 0.0001,
      "step": 5779
    },
    {
      "epoch": 1.6915422885572138,
      "grad_norm": 0.0009613312431611121,
      "learning_rate": 7.711442786069652e-06,
      "loss": 0.0,
      "step": 5780
    },
    {
      "epoch": 1.691834942932397,
      "grad_norm": 0.003605912672355771,
      "learning_rate": 7.70412642669008e-06,
      "loss": 0.0001,
      "step": 5781
    },
    {
      "epoch": 1.6921275973075798,
      "grad_norm": 0.0015000062994658947,
      "learning_rate": 7.696810067310508e-06,
      "loss": 0.0,
      "step": 5782
    },
    {
      "epoch": 1.6924202516827627,
      "grad_norm": 0.0006242544623091817,
      "learning_rate": 7.689493707930934e-06,
      "loss": 0.0,
      "step": 5783
    },
    {
      "epoch": 1.6927129060579456,
      "grad_norm": 0.007943959906697273,
      "learning_rate": 7.682177348551362e-06,
      "loss": 0.0001,
      "step": 5784
    },
    {
      "epoch": 1.6930055604331284,
      "grad_norm": 9.961484465748072e-05,
      "learning_rate": 7.674860989171788e-06,
      "loss": 0.0,
      "step": 5785
    },
    {
      "epoch": 1.6932982148083113,
      "grad_norm": 0.0051424140110611916,
      "learning_rate": 7.667544629792216e-06,
      "loss": 0.0,
      "step": 5786
    },
    {
      "epoch": 1.6935908691834944,
      "grad_norm": 0.004370726179331541,
      "learning_rate": 7.660228270412644e-06,
      "loss": 0.0001,
      "step": 5787
    },
    {
      "epoch": 1.6938835235586773,
      "grad_norm": 0.001413586433045566,
      "learning_rate": 7.65291191103307e-06,
      "loss": 0.0,
      "step": 5788
    },
    {
      "epoch": 1.6941761779338602,
      "grad_norm": 0.001110206125304103,
      "learning_rate": 7.645595551653498e-06,
      "loss": 0.0,
      "step": 5789
    },
    {
      "epoch": 1.694468832309043,
      "grad_norm": 0.0006187248509377241,
      "learning_rate": 7.638279192273926e-06,
      "loss": 0.0,
      "step": 5790
    },
    {
      "epoch": 1.694761486684226,
      "grad_norm": 0.001334872213192284,
      "learning_rate": 7.630962832894353e-06,
      "loss": 0.0,
      "step": 5791
    },
    {
      "epoch": 1.6950541410594089,
      "grad_norm": 0.0008332266588695347,
      "learning_rate": 7.62364647351478e-06,
      "loss": 0.0,
      "step": 5792
    },
    {
      "epoch": 1.6953467954345918,
      "grad_norm": 0.0010400940664112568,
      "learning_rate": 7.616330114135206e-06,
      "loss": 0.0,
      "step": 5793
    },
    {
      "epoch": 1.6956394498097747,
      "grad_norm": 0.0016752583906054497,
      "learning_rate": 7.609013754755634e-06,
      "loss": 0.0,
      "step": 5794
    },
    {
      "epoch": 1.6959321041849575,
      "grad_norm": 0.0008904021815396845,
      "learning_rate": 7.601697395376062e-06,
      "loss": 0.0,
      "step": 5795
    },
    {
      "epoch": 1.6962247585601404,
      "grad_norm": 0.005862629506736994,
      "learning_rate": 7.594381035996488e-06,
      "loss": 0.0001,
      "step": 5796
    },
    {
      "epoch": 1.6965174129353233,
      "grad_norm": 0.0006948685040697455,
      "learning_rate": 7.587064676616916e-06,
      "loss": 0.0,
      "step": 5797
    },
    {
      "epoch": 1.6968100673105062,
      "grad_norm": 1.873349666595459,
      "learning_rate": 7.579748317237344e-06,
      "loss": 0.0037,
      "step": 5798
    },
    {
      "epoch": 1.697102721685689,
      "grad_norm": 0.00048663359484635293,
      "learning_rate": 7.57243195785777e-06,
      "loss": 0.0,
      "step": 5799
    },
    {
      "epoch": 1.697395376060872,
      "grad_norm": 0.00143390370067209,
      "learning_rate": 7.565115598478198e-06,
      "loss": 0.0,
      "step": 5800
    },
    {
      "epoch": 1.6976880304360549,
      "grad_norm": 0.0016824619378894567,
      "learning_rate": 7.5577992390986245e-06,
      "loss": 0.0,
      "step": 5801
    },
    {
      "epoch": 1.6979806848112378,
      "grad_norm": 0.0010202049743384123,
      "learning_rate": 7.550482879719052e-06,
      "loss": 0.0,
      "step": 5802
    },
    {
      "epoch": 1.6982733391864209,
      "grad_norm": 0.001123300869949162,
      "learning_rate": 7.54316652033948e-06,
      "loss": 0.0,
      "step": 5803
    },
    {
      "epoch": 1.6985659935616038,
      "grad_norm": 0.0006333779892884195,
      "learning_rate": 7.535850160959907e-06,
      "loss": 0.0,
      "step": 5804
    },
    {
      "epoch": 1.6988586479367866,
      "grad_norm": 0.0012238698545843363,
      "learning_rate": 7.5285338015803346e-06,
      "loss": 0.0,
      "step": 5805
    },
    {
      "epoch": 1.6991513023119695,
      "grad_norm": 0.00036312537849880755,
      "learning_rate": 7.521217442200761e-06,
      "loss": 0.0,
      "step": 5806
    },
    {
      "epoch": 1.6994439566871524,
      "grad_norm": 0.0009994838619604707,
      "learning_rate": 7.513901082821189e-06,
      "loss": 0.0,
      "step": 5807
    },
    {
      "epoch": 1.6997366110623355,
      "grad_norm": 0.0009392634965479374,
      "learning_rate": 7.506584723441616e-06,
      "loss": 0.0,
      "step": 5808
    },
    {
      "epoch": 1.7000292654375184,
      "grad_norm": 0.0006655640318058431,
      "learning_rate": 7.499268364062043e-06,
      "loss": 0.0,
      "step": 5809
    },
    {
      "epoch": 1.7003219198127013,
      "grad_norm": 0.0019335742108523846,
      "learning_rate": 7.49195200468247e-06,
      "loss": 0.0,
      "step": 5810
    },
    {
      "epoch": 1.7006145741878842,
      "grad_norm": 0.004660231526941061,
      "learning_rate": 7.484635645302898e-06,
      "loss": 0.0001,
      "step": 5811
    },
    {
      "epoch": 1.700907228563067,
      "grad_norm": 0.001245697378180921,
      "learning_rate": 7.477319285923324e-06,
      "loss": 0.0,
      "step": 5812
    },
    {
      "epoch": 1.70119988293825,
      "grad_norm": 0.00019114658061880618,
      "learning_rate": 7.470002926543752e-06,
      "loss": 0.0,
      "step": 5813
    },
    {
      "epoch": 1.7014925373134329,
      "grad_norm": 0.0001273701636819169,
      "learning_rate": 7.4626865671641785e-06,
      "loss": 0.0,
      "step": 5814
    },
    {
      "epoch": 1.7017851916886158,
      "grad_norm": 0.0007166353752836585,
      "learning_rate": 7.4553702077846065e-06,
      "loss": 0.0,
      "step": 5815
    },
    {
      "epoch": 1.7020778460637986,
      "grad_norm": 0.022403433918952942,
      "learning_rate": 7.448053848405034e-06,
      "loss": 0.0002,
      "step": 5816
    },
    {
      "epoch": 1.7023705004389815,
      "grad_norm": 0.0005216248100623488,
      "learning_rate": 7.440737489025461e-06,
      "loss": 0.0,
      "step": 5817
    },
    {
      "epoch": 1.7026631548141644,
      "grad_norm": 0.00046261277748271823,
      "learning_rate": 7.433421129645889e-06,
      "loss": 0.0,
      "step": 5818
    },
    {
      "epoch": 1.7029558091893473,
      "grad_norm": 0.0011683704797178507,
      "learning_rate": 7.4261047702663166e-06,
      "loss": 0.0,
      "step": 5819
    },
    {
      "epoch": 1.7032484635645302,
      "grad_norm": 0.003403689945116639,
      "learning_rate": 7.418788410886743e-06,
      "loss": 0.0001,
      "step": 5820
    },
    {
      "epoch": 1.703541117939713,
      "grad_norm": 0.0008349033887498081,
      "learning_rate": 7.411472051507171e-06,
      "loss": 0.0,
      "step": 5821
    },
    {
      "epoch": 1.703833772314896,
      "grad_norm": 0.0022660107351839542,
      "learning_rate": 7.404155692127597e-06,
      "loss": 0.0,
      "step": 5822
    },
    {
      "epoch": 1.7041264266900789,
      "grad_norm": 0.0004610086034517735,
      "learning_rate": 7.396839332748025e-06,
      "loss": 0.0,
      "step": 5823
    },
    {
      "epoch": 1.704419081065262,
      "grad_norm": 0.0004957149503752589,
      "learning_rate": 7.389522973368453e-06,
      "loss": 0.0,
      "step": 5824
    },
    {
      "epoch": 1.7047117354404449,
      "grad_norm": 0.0004168192099314183,
      "learning_rate": 7.382206613988879e-06,
      "loss": 0.0,
      "step": 5825
    },
    {
      "epoch": 1.7050043898156277,
      "grad_norm": 0.00333635276183486,
      "learning_rate": 7.374890254609307e-06,
      "loss": 0.0,
      "step": 5826
    },
    {
      "epoch": 1.7052970441908106,
      "grad_norm": 0.014504407532513142,
      "learning_rate": 7.367573895229734e-06,
      "loss": 0.0001,
      "step": 5827
    },
    {
      "epoch": 1.7055896985659935,
      "grad_norm": 0.0007202645647339523,
      "learning_rate": 7.360257535850161e-06,
      "loss": 0.0,
      "step": 5828
    },
    {
      "epoch": 1.7058823529411766,
      "grad_norm": 0.00040720164543017745,
      "learning_rate": 7.3529411764705884e-06,
      "loss": 0.0,
      "step": 5829
    },
    {
      "epoch": 1.7061750073163595,
      "grad_norm": 4.333349704742432,
      "learning_rate": 7.3456248170910155e-06,
      "loss": 0.298,
      "step": 5830
    },
    {
      "epoch": 1.7064676616915424,
      "grad_norm": 0.008811557665467262,
      "learning_rate": 7.338308457711443e-06,
      "loss": 0.0001,
      "step": 5831
    },
    {
      "epoch": 1.7067603160667253,
      "grad_norm": 0.0010597541695460677,
      "learning_rate": 7.330992098331871e-06,
      "loss": 0.0,
      "step": 5832
    },
    {
      "epoch": 1.7070529704419082,
      "grad_norm": 0.0005229754606261849,
      "learning_rate": 7.323675738952297e-06,
      "loss": 0.0,
      "step": 5833
    },
    {
      "epoch": 1.707345624817091,
      "grad_norm": 0.0004036907048430294,
      "learning_rate": 7.316359379572725e-06,
      "loss": 0.0,
      "step": 5834
    },
    {
      "epoch": 1.707638279192274,
      "grad_norm": 0.0015681361546739936,
      "learning_rate": 7.309043020193153e-06,
      "loss": 0.0,
      "step": 5835
    },
    {
      "epoch": 1.7079309335674568,
      "grad_norm": 0.0007967501296661794,
      "learning_rate": 7.301726660813579e-06,
      "loss": 0.0,
      "step": 5836
    },
    {
      "epoch": 1.7082235879426397,
      "grad_norm": 0.0037929262034595013,
      "learning_rate": 7.294410301434007e-06,
      "loss": 0.0,
      "step": 5837
    },
    {
      "epoch": 1.7085162423178226,
      "grad_norm": 0.0007488639676012099,
      "learning_rate": 7.287093942054433e-06,
      "loss": 0.0,
      "step": 5838
    },
    {
      "epoch": 1.7088088966930055,
      "grad_norm": 0.0005727543029934168,
      "learning_rate": 7.279777582674861e-06,
      "loss": 0.0,
      "step": 5839
    },
    {
      "epoch": 1.7091015510681884,
      "grad_norm": 0.0005464969435706735,
      "learning_rate": 7.272461223295289e-06,
      "loss": 0.0,
      "step": 5840
    },
    {
      "epoch": 1.7093942054433713,
      "grad_norm": 0.0005111905629746616,
      "learning_rate": 7.265144863915715e-06,
      "loss": 0.0,
      "step": 5841
    },
    {
      "epoch": 1.7096868598185542,
      "grad_norm": 0.005028417333960533,
      "learning_rate": 7.257828504536143e-06,
      "loss": 0.0001,
      "step": 5842
    },
    {
      "epoch": 1.709979514193737,
      "grad_norm": 0.0034411484375596046,
      "learning_rate": 7.250512145156571e-06,
      "loss": 0.0001,
      "step": 5843
    },
    {
      "epoch": 1.71027216856892,
      "grad_norm": 0.027304381132125854,
      "learning_rate": 7.2431957857769975e-06,
      "loss": 0.0001,
      "step": 5844
    },
    {
      "epoch": 1.710564822944103,
      "grad_norm": 0.000758185051381588,
      "learning_rate": 7.2358794263974255e-06,
      "loss": 0.0,
      "step": 5845
    },
    {
      "epoch": 1.710857477319286,
      "grad_norm": 0.0034596100449562073,
      "learning_rate": 7.228563067017852e-06,
      "loss": 0.0001,
      "step": 5846
    },
    {
      "epoch": 1.7111501316944688,
      "grad_norm": 0.0017440527444705367,
      "learning_rate": 7.22124670763828e-06,
      "loss": 0.0,
      "step": 5847
    },
    {
      "epoch": 1.7114427860696517,
      "grad_norm": 0.00359273049980402,
      "learning_rate": 7.213930348258708e-06,
      "loss": 0.0001,
      "step": 5848
    },
    {
      "epoch": 1.7117354404448346,
      "grad_norm": 0.002152426866814494,
      "learning_rate": 7.206613988879134e-06,
      "loss": 0.0,
      "step": 5849
    },
    {
      "epoch": 1.7120280948200177,
      "grad_norm": 2.712299108505249,
      "learning_rate": 7.199297629499562e-06,
      "loss": 0.1384,
      "step": 5850
    },
    {
      "epoch": 1.7123207491952006,
      "grad_norm": 0.0023311313707381487,
      "learning_rate": 7.191981270119989e-06,
      "loss": 0.0,
      "step": 5851
    },
    {
      "epoch": 1.7126134035703835,
      "grad_norm": 0.01575295813381672,
      "learning_rate": 7.184664910740416e-06,
      "loss": 0.0003,
      "step": 5852
    },
    {
      "epoch": 1.7129060579455664,
      "grad_norm": 0.002103086095303297,
      "learning_rate": 7.177348551360843e-06,
      "loss": 0.0,
      "step": 5853
    },
    {
      "epoch": 1.7131987123207493,
      "grad_norm": 0.004746034741401672,
      "learning_rate": 7.17003219198127e-06,
      "loss": 0.0001,
      "step": 5854
    },
    {
      "epoch": 1.7134913666959322,
      "grad_norm": 0.009943942539393902,
      "learning_rate": 7.162715832601697e-06,
      "loss": 0.0001,
      "step": 5855
    },
    {
      "epoch": 1.713784021071115,
      "grad_norm": 0.008769072592258453,
      "learning_rate": 7.155399473222125e-06,
      "loss": 0.0001,
      "step": 5856
    },
    {
      "epoch": 1.714076675446298,
      "grad_norm": 0.03581611439585686,
      "learning_rate": 7.1480831138425516e-06,
      "loss": 0.0005,
      "step": 5857
    },
    {
      "epoch": 1.7143693298214808,
      "grad_norm": 0.007830784656107426,
      "learning_rate": 7.1407667544629795e-06,
      "loss": 0.0001,
      "step": 5858
    },
    {
      "epoch": 1.7146619841966637,
      "grad_norm": 0.012227417901158333,
      "learning_rate": 7.1334503950834075e-06,
      "loss": 0.0002,
      "step": 5859
    },
    {
      "epoch": 1.7149546385718466,
      "grad_norm": 0.013806314207613468,
      "learning_rate": 7.126134035703834e-06,
      "loss": 0.0002,
      "step": 5860
    },
    {
      "epoch": 1.7152472929470295,
      "grad_norm": 0.005988314747810364,
      "learning_rate": 7.118817676324262e-06,
      "loss": 0.0001,
      "step": 5861
    },
    {
      "epoch": 1.7155399473222124,
      "grad_norm": 0.00835619680583477,
      "learning_rate": 7.111501316944688e-06,
      "loss": 0.0001,
      "step": 5862
    },
    {
      "epoch": 1.7158326016973953,
      "grad_norm": 0.1585269570350647,
      "learning_rate": 7.104184957565116e-06,
      "loss": 0.0017,
      "step": 5863
    },
    {
      "epoch": 1.7161252560725782,
      "grad_norm": 0.00829259678721428,
      "learning_rate": 7.096868598185544e-06,
      "loss": 0.0001,
      "step": 5864
    },
    {
      "epoch": 1.716417910447761,
      "grad_norm": 0.024783294647932053,
      "learning_rate": 7.08955223880597e-06,
      "loss": 0.0004,
      "step": 5865
    },
    {
      "epoch": 1.7167105648229442,
      "grad_norm": 0.012745466083288193,
      "learning_rate": 7.082235879426398e-06,
      "loss": 0.0002,
      "step": 5866
    },
    {
      "epoch": 1.717003219198127,
      "grad_norm": 0.010452482849359512,
      "learning_rate": 7.074919520046824e-06,
      "loss": 0.0001,
      "step": 5867
    },
    {
      "epoch": 1.71729587357331,
      "grad_norm": 0.020907893776893616,
      "learning_rate": 7.067603160667252e-06,
      "loss": 0.0003,
      "step": 5868
    },
    {
      "epoch": 1.7175885279484928,
      "grad_norm": 0.007186241913586855,
      "learning_rate": 7.06028680128768e-06,
      "loss": 0.0001,
      "step": 5869
    },
    {
      "epoch": 1.7178811823236757,
      "grad_norm": 0.011177362874150276,
      "learning_rate": 7.0529704419081064e-06,
      "loss": 0.0001,
      "step": 5870
    },
    {
      "epoch": 1.7181738366988588,
      "grad_norm": 0.06881462782621384,
      "learning_rate": 7.045654082528534e-06,
      "loss": 0.0009,
      "step": 5871
    },
    {
      "epoch": 1.7184664910740417,
      "grad_norm": 0.025284336879849434,
      "learning_rate": 7.038337723148962e-06,
      "loss": 0.0004,
      "step": 5872
    },
    {
      "epoch": 1.7187591454492246,
      "grad_norm": 0.0042735980823636055,
      "learning_rate": 7.031021363769389e-06,
      "loss": 0.0001,
      "step": 5873
    },
    {
      "epoch": 1.7190517998244075,
      "grad_norm": 0.004070568364113569,
      "learning_rate": 7.0237050043898166e-06,
      "loss": 0.0001,
      "step": 5874
    },
    {
      "epoch": 1.7193444541995904,
      "grad_norm": 0.01514324638992548,
      "learning_rate": 7.016388645010243e-06,
      "loss": 0.0002,
      "step": 5875
    },
    {
      "epoch": 1.7196371085747733,
      "grad_norm": 0.01873566210269928,
      "learning_rate": 7.009072285630671e-06,
      "loss": 0.0002,
      "step": 5876
    },
    {
      "epoch": 1.7199297629499561,
      "grad_norm": 0.02724928967654705,
      "learning_rate": 7.001755926251098e-06,
      "loss": 0.0004,
      "step": 5877
    },
    {
      "epoch": 1.720222417325139,
      "grad_norm": 0.046195484697818756,
      "learning_rate": 6.994439566871525e-06,
      "loss": 0.0005,
      "step": 5878
    },
    {
      "epoch": 1.720515071700322,
      "grad_norm": 0.47184377908706665,
      "learning_rate": 6.987123207491952e-06,
      "loss": 0.0064,
      "step": 5879
    },
    {
      "epoch": 1.7208077260755048,
      "grad_norm": 0.022266795858740807,
      "learning_rate": 6.97980684811238e-06,
      "loss": 0.0003,
      "step": 5880
    },
    {
      "epoch": 1.7211003804506877,
      "grad_norm": 0.01935538277029991,
      "learning_rate": 6.972490488732806e-06,
      "loss": 0.0003,
      "step": 5881
    },
    {
      "epoch": 1.7213930348258706,
      "grad_norm": 0.004155569709837437,
      "learning_rate": 6.965174129353234e-06,
      "loss": 0.0001,
      "step": 5882
    },
    {
      "epoch": 1.7216856892010535,
      "grad_norm": 0.004147347528487444,
      "learning_rate": 6.9578577699736605e-06,
      "loss": 0.0001,
      "step": 5883
    },
    {
      "epoch": 1.7219783435762364,
      "grad_norm": 0.00926827359944582,
      "learning_rate": 6.9505414105940884e-06,
      "loss": 0.0001,
      "step": 5884
    },
    {
      "epoch": 1.7222709979514192,
      "grad_norm": 0.013978640548884869,
      "learning_rate": 6.943225051214516e-06,
      "loss": 0.0003,
      "step": 5885
    },
    {
      "epoch": 1.7225636523266021,
      "grad_norm": 0.0015110508538782597,
      "learning_rate": 6.935908691834943e-06,
      "loss": 0.0,
      "step": 5886
    },
    {
      "epoch": 1.7228563067017852,
      "grad_norm": 0.0021568655502051115,
      "learning_rate": 6.928592332455371e-06,
      "loss": 0.0,
      "step": 5887
    },
    {
      "epoch": 1.7231489610769681,
      "grad_norm": 0.002588055795058608,
      "learning_rate": 6.9212759730757985e-06,
      "loss": 0.0,
      "step": 5888
    },
    {
      "epoch": 1.723441615452151,
      "grad_norm": 0.003095940686762333,
      "learning_rate": 6.913959613696225e-06,
      "loss": 0.0001,
      "step": 5889
    },
    {
      "epoch": 1.723734269827334,
      "grad_norm": 0.0008062701090238988,
      "learning_rate": 6.906643254316653e-06,
      "loss": 0.0,
      "step": 5890
    },
    {
      "epoch": 1.7240269242025168,
      "grad_norm": 0.0026606128085404634,
      "learning_rate": 6.899326894937079e-06,
      "loss": 0.0001,
      "step": 5891
    },
    {
      "epoch": 1.7243195785776997,
      "grad_norm": 0.0003834792005363852,
      "learning_rate": 6.892010535557507e-06,
      "loss": 0.0,
      "step": 5892
    },
    {
      "epoch": 1.7246122329528828,
      "grad_norm": 0.004114055074751377,
      "learning_rate": 6.884694176177935e-06,
      "loss": 0.0001,
      "step": 5893
    },
    {
      "epoch": 1.7249048873280657,
      "grad_norm": 0.0007469934644177556,
      "learning_rate": 6.877377816798361e-06,
      "loss": 0.0,
      "step": 5894
    },
    {
      "epoch": 1.7251975417032486,
      "grad_norm": 0.0007380041643045843,
      "learning_rate": 6.870061457418789e-06,
      "loss": 0.0,
      "step": 5895
    },
    {
      "epoch": 1.7254901960784315,
      "grad_norm": 0.0008967572939582169,
      "learning_rate": 6.862745098039216e-06,
      "loss": 0.0,
      "step": 5896
    },
    {
      "epoch": 1.7257828504536143,
      "grad_norm": 0.000646058761049062,
      "learning_rate": 6.855428738659643e-06,
      "loss": 0.0,
      "step": 5897
    },
    {
      "epoch": 1.7260755048287972,
      "grad_norm": 0.0005116090178489685,
      "learning_rate": 6.84811237928007e-06,
      "loss": 0.0,
      "step": 5898
    },
    {
      "epoch": 1.7263681592039801,
      "grad_norm": 0.003847175743430853,
      "learning_rate": 6.8407960199004975e-06,
      "loss": 0.0001,
      "step": 5899
    },
    {
      "epoch": 1.726660813579163,
      "grad_norm": 0.0018641845090314746,
      "learning_rate": 6.833479660520925e-06,
      "loss": 0.0,
      "step": 5900
    },
    {
      "epoch": 1.726953467954346,
      "grad_norm": 0.0018206120003014803,
      "learning_rate": 6.826163301141353e-06,
      "loss": 0.0,
      "step": 5901
    },
    {
      "epoch": 1.7272461223295288,
      "grad_norm": 0.0013743533054366708,
      "learning_rate": 6.818846941761779e-06,
      "loss": 0.0,
      "step": 5902
    },
    {
      "epoch": 1.7275387767047117,
      "grad_norm": 0.0014163124142214656,
      "learning_rate": 6.811530582382207e-06,
      "loss": 0.0,
      "step": 5903
    },
    {
      "epoch": 1.7278314310798946,
      "grad_norm": 0.0029564399737864733,
      "learning_rate": 6.804214223002635e-06,
      "loss": 0.0,
      "step": 5904
    },
    {
      "epoch": 1.7281240854550775,
      "grad_norm": 0.001355087966658175,
      "learning_rate": 6.796897863623061e-06,
      "loss": 0.0,
      "step": 5905
    },
    {
      "epoch": 1.7284167398302603,
      "grad_norm": 0.00047083597746677697,
      "learning_rate": 6.789581504243489e-06,
      "loss": 0.0,
      "step": 5906
    },
    {
      "epoch": 1.7287093942054432,
      "grad_norm": 0.0031178754288703203,
      "learning_rate": 6.782265144863915e-06,
      "loss": 0.0001,
      "step": 5907
    },
    {
      "epoch": 1.7290020485806261,
      "grad_norm": 0.0011868373258039355,
      "learning_rate": 6.774948785484343e-06,
      "loss": 0.0,
      "step": 5908
    },
    {
      "epoch": 1.7292947029558092,
      "grad_norm": 0.003698865883052349,
      "learning_rate": 6.767632426104771e-06,
      "loss": 0.0001,
      "step": 5909
    },
    {
      "epoch": 1.7295873573309921,
      "grad_norm": 0.0058182161301374435,
      "learning_rate": 6.760316066725197e-06,
      "loss": 0.0001,
      "step": 5910
    },
    {
      "epoch": 1.729880011706175,
      "grad_norm": 0.00122758187353611,
      "learning_rate": 6.752999707345625e-06,
      "loss": 0.0,
      "step": 5911
    },
    {
      "epoch": 1.730172666081358,
      "grad_norm": 0.0046470691449940205,
      "learning_rate": 6.745683347966053e-06,
      "loss": 0.0001,
      "step": 5912
    },
    {
      "epoch": 1.7304653204565408,
      "grad_norm": 0.000916247081477195,
      "learning_rate": 6.7383669885864795e-06,
      "loss": 0.0,
      "step": 5913
    },
    {
      "epoch": 1.7307579748317239,
      "grad_norm": 0.008241435512900352,
      "learning_rate": 6.7310506292069075e-06,
      "loss": 0.0001,
      "step": 5914
    },
    {
      "epoch": 1.7310506292069068,
      "grad_norm": 0.0019211042672395706,
      "learning_rate": 6.723734269827334e-06,
      "loss": 0.0,
      "step": 5915
    },
    {
      "epoch": 1.7313432835820897,
      "grad_norm": 0.0039780582301318645,
      "learning_rate": 6.716417910447762e-06,
      "loss": 0.0001,
      "step": 5916
    },
    {
      "epoch": 1.7316359379572726,
      "grad_norm": 0.0008696449222043157,
      "learning_rate": 6.70910155106819e-06,
      "loss": 0.0,
      "step": 5917
    },
    {
      "epoch": 1.7319285923324554,
      "grad_norm": 0.004171712789684534,
      "learning_rate": 6.701785191688616e-06,
      "loss": 0.0001,
      "step": 5918
    },
    {
      "epoch": 1.7322212467076383,
      "grad_norm": 0.000716442649718374,
      "learning_rate": 6.694468832309044e-06,
      "loss": 0.0,
      "step": 5919
    },
    {
      "epoch": 1.7325139010828212,
      "grad_norm": 0.0023232263047248125,
      "learning_rate": 6.687152472929471e-06,
      "loss": 0.0,
      "step": 5920
    },
    {
      "epoch": 1.732806555458004,
      "grad_norm": 0.0019374418770894408,
      "learning_rate": 6.679836113549898e-06,
      "loss": 0.0,
      "step": 5921
    },
    {
      "epoch": 1.733099209833187,
      "grad_norm": 0.001732157776132226,
      "learning_rate": 6.672519754170325e-06,
      "loss": 0.0,
      "step": 5922
    },
    {
      "epoch": 1.7333918642083699,
      "grad_norm": 0.0008150420617312193,
      "learning_rate": 6.665203394790752e-06,
      "loss": 0.0,
      "step": 5923
    },
    {
      "epoch": 1.7336845185835528,
      "grad_norm": 0.000821317604277283,
      "learning_rate": 6.657887035411179e-06,
      "loss": 0.0,
      "step": 5924
    },
    {
      "epoch": 1.7339771729587357,
      "grad_norm": 0.0010837716981768608,
      "learning_rate": 6.650570676031607e-06,
      "loss": 0.0,
      "step": 5925
    },
    {
      "epoch": 1.7342698273339185,
      "grad_norm": 0.0008237934089265764,
      "learning_rate": 6.6432543166520335e-06,
      "loss": 0.0,
      "step": 5926
    },
    {
      "epoch": 1.7345624817091014,
      "grad_norm": 0.000993865542113781,
      "learning_rate": 6.6359379572724615e-06,
      "loss": 0.0,
      "step": 5927
    },
    {
      "epoch": 1.7348551360842843,
      "grad_norm": 0.0005446638097055256,
      "learning_rate": 6.6286215978928894e-06,
      "loss": 0.0,
      "step": 5928
    },
    {
      "epoch": 1.7351477904594672,
      "grad_norm": 0.00519817229360342,
      "learning_rate": 6.621305238513316e-06,
      "loss": 0.0001,
      "step": 5929
    },
    {
      "epoch": 1.7354404448346503,
      "grad_norm": 0.0009001668076962233,
      "learning_rate": 6.613988879133744e-06,
      "loss": 0.0,
      "step": 5930
    },
    {
      "epoch": 1.7357330992098332,
      "grad_norm": 0.0030989614315330982,
      "learning_rate": 6.60667251975417e-06,
      "loss": 0.0001,
      "step": 5931
    },
    {
      "epoch": 1.736025753585016,
      "grad_norm": 0.0009251827141270041,
      "learning_rate": 6.599356160374598e-06,
      "loss": 0.0,
      "step": 5932
    },
    {
      "epoch": 1.736318407960199,
      "grad_norm": 0.0012371689081192017,
      "learning_rate": 6.592039800995026e-06,
      "loss": 0.0,
      "step": 5933
    },
    {
      "epoch": 1.7366110623353819,
      "grad_norm": 0.0036600898019969463,
      "learning_rate": 6.584723441615452e-06,
      "loss": 0.0001,
      "step": 5934
    },
    {
      "epoch": 1.736903716710565,
      "grad_norm": 0.0002120627905242145,
      "learning_rate": 6.57740708223588e-06,
      "loss": 0.0,
      "step": 5935
    },
    {
      "epoch": 1.7371963710857479,
      "grad_norm": 0.002093351213261485,
      "learning_rate": 6.570090722856306e-06,
      "loss": 0.0,
      "step": 5936
    },
    {
      "epoch": 1.7374890254609308,
      "grad_norm": 0.0016823607729747891,
      "learning_rate": 6.562774363476734e-06,
      "loss": 0.0,
      "step": 5937
    },
    {
      "epoch": 1.7377816798361136,
      "grad_norm": 0.0007766742492094636,
      "learning_rate": 6.555458004097162e-06,
      "loss": 0.0,
      "step": 5938
    },
    {
      "epoch": 1.7380743342112965,
      "grad_norm": 0.0010464844526723027,
      "learning_rate": 6.5481416447175884e-06,
      "loss": 0.0,
      "step": 5939
    },
    {
      "epoch": 1.7383669885864794,
      "grad_norm": 0.005947027821093798,
      "learning_rate": 6.540825285338016e-06,
      "loss": 0.0001,
      "step": 5940
    },
    {
      "epoch": 1.7386596429616623,
      "grad_norm": 0.000575781858060509,
      "learning_rate": 6.533508925958444e-06,
      "loss": 0.0,
      "step": 5941
    },
    {
      "epoch": 1.7389522973368452,
      "grad_norm": 0.0007926999824121594,
      "learning_rate": 6.526192566578871e-06,
      "loss": 0.0,
      "step": 5942
    },
    {
      "epoch": 1.739244951712028,
      "grad_norm": 0.0013060495257377625,
      "learning_rate": 6.5188762071992985e-06,
      "loss": 0.0,
      "step": 5943
    },
    {
      "epoch": 1.739537606087211,
      "grad_norm": 0.0033319902140647173,
      "learning_rate": 6.511559847819725e-06,
      "loss": 0.0001,
      "step": 5944
    },
    {
      "epoch": 1.7398302604623939,
      "grad_norm": 0.015158601105213165,
      "learning_rate": 6.504243488440153e-06,
      "loss": 0.0001,
      "step": 5945
    },
    {
      "epoch": 1.7401229148375768,
      "grad_norm": 0.0009031699155457318,
      "learning_rate": 6.49692712906058e-06,
      "loss": 0.0,
      "step": 5946
    },
    {
      "epoch": 1.7404155692127596,
      "grad_norm": 0.0016654676292091608,
      "learning_rate": 6.489610769681007e-06,
      "loss": 0.0,
      "step": 5947
    },
    {
      "epoch": 1.7407082235879425,
      "grad_norm": 0.0007055064779706299,
      "learning_rate": 6.482294410301434e-06,
      "loss": 0.0,
      "step": 5948
    },
    {
      "epoch": 1.7410008779631254,
      "grad_norm": 0.0005536130629479885,
      "learning_rate": 6.474978050921862e-06,
      "loss": 0.0,
      "step": 5949
    },
    {
      "epoch": 1.7412935323383083,
      "grad_norm": 0.0008756743045523763,
      "learning_rate": 6.467661691542288e-06,
      "loss": 0.0,
      "step": 5950
    },
    {
      "epoch": 1.7415861867134914,
      "grad_norm": 0.00022419232118409127,
      "learning_rate": 6.460345332162716e-06,
      "loss": 0.0,
      "step": 5951
    },
    {
      "epoch": 1.7418788410886743,
      "grad_norm": 0.001122329500503838,
      "learning_rate": 6.4530289727831425e-06,
      "loss": 0.0,
      "step": 5952
    },
    {
      "epoch": 1.7421714954638572,
      "grad_norm": 0.0014706572983413935,
      "learning_rate": 6.44571261340357e-06,
      "loss": 0.0,
      "step": 5953
    },
    {
      "epoch": 1.74246414983904,
      "grad_norm": 0.0018914340762421489,
      "learning_rate": 6.438396254023998e-06,
      "loss": 0.0,
      "step": 5954
    },
    {
      "epoch": 1.742756804214223,
      "grad_norm": 0.0010767308995127678,
      "learning_rate": 6.431079894644425e-06,
      "loss": 0.0,
      "step": 5955
    },
    {
      "epoch": 1.743049458589406,
      "grad_norm": 0.0013149684527888894,
      "learning_rate": 6.4237635352648526e-06,
      "loss": 0.0,
      "step": 5956
    },
    {
      "epoch": 1.743342112964589,
      "grad_norm": 0.004895191639661789,
      "learning_rate": 6.4164471758852805e-06,
      "loss": 0.0001,
      "step": 5957
    },
    {
      "epoch": 1.7436347673397719,
      "grad_norm": 0.0004786603385582566,
      "learning_rate": 6.409130816505707e-06,
      "loss": 0.0,
      "step": 5958
    },
    {
      "epoch": 1.7439274217149547,
      "grad_norm": 0.0014774624723941088,
      "learning_rate": 6.401814457126135e-06,
      "loss": 0.0,
      "step": 5959
    },
    {
      "epoch": 1.7442200760901376,
      "grad_norm": 0.10711546987295151,
      "learning_rate": 6.394498097746561e-06,
      "loss": 0.0009,
      "step": 5960
    },
    {
      "epoch": 1.7445127304653205,
      "grad_norm": 0.001412463141605258,
      "learning_rate": 6.387181738366989e-06,
      "loss": 0.0,
      "step": 5961
    },
    {
      "epoch": 1.7448053848405034,
      "grad_norm": 0.0007251347415149212,
      "learning_rate": 6.379865378987417e-06,
      "loss": 0.0,
      "step": 5962
    },
    {
      "epoch": 1.7450980392156863,
      "grad_norm": 0.0013520853826776147,
      "learning_rate": 6.372549019607843e-06,
      "loss": 0.0,
      "step": 5963
    },
    {
      "epoch": 1.7453906935908692,
      "grad_norm": 0.0023613956291228533,
      "learning_rate": 6.365232660228271e-06,
      "loss": 0.0001,
      "step": 5964
    },
    {
      "epoch": 1.745683347966052,
      "grad_norm": 0.0010315789841115475,
      "learning_rate": 6.357916300848698e-06,
      "loss": 0.0,
      "step": 5965
    },
    {
      "epoch": 1.745976002341235,
      "grad_norm": 0.00086397078121081,
      "learning_rate": 6.350599941469125e-06,
      "loss": 0.0,
      "step": 5966
    },
    {
      "epoch": 1.7462686567164178,
      "grad_norm": 0.0010542155941948295,
      "learning_rate": 6.343283582089552e-06,
      "loss": 0.0,
      "step": 5967
    },
    {
      "epoch": 1.7465613110916007,
      "grad_norm": 0.04119636490941048,
      "learning_rate": 6.3359672227099795e-06,
      "loss": 0.0002,
      "step": 5968
    },
    {
      "epoch": 1.7468539654667836,
      "grad_norm": 0.0012462715385481715,
      "learning_rate": 6.328650863330407e-06,
      "loss": 0.0,
      "step": 5969
    },
    {
      "epoch": 1.7471466198419665,
      "grad_norm": 0.00031741769635118544,
      "learning_rate": 6.3213345039508346e-06,
      "loss": 0.0,
      "step": 5970
    },
    {
      "epoch": 1.7474392742171494,
      "grad_norm": 0.0022417607251554728,
      "learning_rate": 6.314018144571261e-06,
      "loss": 0.0,
      "step": 5971
    },
    {
      "epoch": 1.7477319285923325,
      "grad_norm": 0.0006855145911686122,
      "learning_rate": 6.306701785191689e-06,
      "loss": 0.0,
      "step": 5972
    },
    {
      "epoch": 1.7480245829675154,
      "grad_norm": 0.01104377955198288,
      "learning_rate": 6.299385425812117e-06,
      "loss": 0.0002,
      "step": 5973
    },
    {
      "epoch": 1.7483172373426983,
      "grad_norm": 0.0005324244848452508,
      "learning_rate": 6.292069066432543e-06,
      "loss": 0.0,
      "step": 5974
    },
    {
      "epoch": 1.7486098917178812,
      "grad_norm": 0.0008127905894070864,
      "learning_rate": 6.284752707052971e-06,
      "loss": 0.0,
      "step": 5975
    },
    {
      "epoch": 1.748902546093064,
      "grad_norm": 0.0008344686939381063,
      "learning_rate": 6.277436347673397e-06,
      "loss": 0.0,
      "step": 5976
    },
    {
      "epoch": 1.749195200468247,
      "grad_norm": 0.0004917003680020571,
      "learning_rate": 6.270119988293825e-06,
      "loss": 0.0,
      "step": 5977
    },
    {
      "epoch": 1.74948785484343,
      "grad_norm": 0.0009864646708592772,
      "learning_rate": 6.262803628914253e-06,
      "loss": 0.0,
      "step": 5978
    },
    {
      "epoch": 1.749780509218613,
      "grad_norm": 0.0019904731307178736,
      "learning_rate": 6.255487269534679e-06,
      "loss": 0.0,
      "step": 5979
    },
    {
      "epoch": 1.7500731635937958,
      "grad_norm": 0.006802576594054699,
      "learning_rate": 6.248170910155107e-06,
      "loss": 0.0001,
      "step": 5980
    },
    {
      "epoch": 1.7503658179689787,
      "grad_norm": 0.000930826470721513,
      "learning_rate": 6.240854550775534e-06,
      "loss": 0.0,
      "step": 5981
    },
    {
      "epoch": 1.7506584723441616,
      "grad_norm": 0.0014126853784546256,
      "learning_rate": 6.2335381913959615e-06,
      "loss": 0.0,
      "step": 5982
    },
    {
      "epoch": 1.7509511267193445,
      "grad_norm": 0.0048756287433207035,
      "learning_rate": 6.2262218320163894e-06,
      "loss": 0.0,
      "step": 5983
    },
    {
      "epoch": 1.7512437810945274,
      "grad_norm": 0.0008603222668170929,
      "learning_rate": 6.2189054726368165e-06,
      "loss": 0.0,
      "step": 5984
    },
    {
      "epoch": 1.7515364354697103,
      "grad_norm": 0.006025762762874365,
      "learning_rate": 6.211589113257244e-06,
      "loss": 0.0001,
      "step": 5985
    },
    {
      "epoch": 1.7518290898448932,
      "grad_norm": 0.0022837838623672724,
      "learning_rate": 6.204272753877671e-06,
      "loss": 0.0,
      "step": 5986
    },
    {
      "epoch": 1.752121744220076,
      "grad_norm": 0.0006897895946167409,
      "learning_rate": 6.196956394498099e-06,
      "loss": 0.0,
      "step": 5987
    },
    {
      "epoch": 1.752414398595259,
      "grad_norm": 0.0007595543283969164,
      "learning_rate": 6.189640035118526e-06,
      "loss": 0.0,
      "step": 5988
    },
    {
      "epoch": 1.7527070529704418,
      "grad_norm": 0.0010641933185979724,
      "learning_rate": 6.182323675738953e-06,
      "loss": 0.0,
      "step": 5989
    },
    {
      "epoch": 1.7529997073456247,
      "grad_norm": 0.000681478064507246,
      "learning_rate": 6.17500731635938e-06,
      "loss": 0.0,
      "step": 5990
    },
    {
      "epoch": 1.7532923617208076,
      "grad_norm": 0.0002897112863138318,
      "learning_rate": 6.167690956979807e-06,
      "loss": 0.0,
      "step": 5991
    },
    {
      "epoch": 1.7535850160959905,
      "grad_norm": 0.0010328495409339666,
      "learning_rate": 6.160374597600234e-06,
      "loss": 0.0,
      "step": 5992
    },
    {
      "epoch": 1.7538776704711734,
      "grad_norm": 0.0010134896729141474,
      "learning_rate": 6.153058238220661e-06,
      "loss": 0.0,
      "step": 5993
    },
    {
      "epoch": 1.7541703248463565,
      "grad_norm": 0.006686047185212374,
      "learning_rate": 6.1457418788410884e-06,
      "loss": 0.0001,
      "step": 5994
    },
    {
      "epoch": 1.7544629792215394,
      "grad_norm": 0.0012931345263496041,
      "learning_rate": 6.138425519461516e-06,
      "loss": 0.0,
      "step": 5995
    },
    {
      "epoch": 1.7547556335967223,
      "grad_norm": 0.0007904611411504447,
      "learning_rate": 6.1311091600819435e-06,
      "loss": 0.0,
      "step": 5996
    },
    {
      "epoch": 1.7550482879719052,
      "grad_norm": 0.0005322484066709876,
      "learning_rate": 6.123792800702371e-06,
      "loss": 0.0,
      "step": 5997
    },
    {
      "epoch": 1.755340942347088,
      "grad_norm": 0.0006279582157731056,
      "learning_rate": 6.116476441322798e-06,
      "loss": 0.0,
      "step": 5998
    },
    {
      "epoch": 1.7556335967222712,
      "grad_norm": 0.0011968952603638172,
      "learning_rate": 6.109160081943225e-06,
      "loss": 0.0,
      "step": 5999
    },
    {
      "epoch": 1.755926251097454,
      "grad_norm": 0.011229063384234905,
      "learning_rate": 6.101843722563653e-06,
      "loss": 0.0001,
      "step": 6000
    },
    {
      "epoch": 1.756218905472637,
      "grad_norm": 0.0007652883650735021,
      "learning_rate": 6.09452736318408e-06,
      "loss": 0.0,
      "step": 6001
    },
    {
      "epoch": 1.7565115598478198,
      "grad_norm": 0.0005294579896144569,
      "learning_rate": 6.087211003804507e-06,
      "loss": 0.0,
      "step": 6002
    },
    {
      "epoch": 1.7568042142230027,
      "grad_norm": 0.0006569406250491738,
      "learning_rate": 6.079894644424934e-06,
      "loss": 0.0,
      "step": 6003
    },
    {
      "epoch": 1.7570968685981856,
      "grad_norm": 0.007408336736261845,
      "learning_rate": 6.072578285045362e-06,
      "loss": 0.0001,
      "step": 6004
    },
    {
      "epoch": 1.7573895229733685,
      "grad_norm": 0.0008690960239619017,
      "learning_rate": 6.065261925665789e-06,
      "loss": 0.0,
      "step": 6005
    },
    {
      "epoch": 1.7576821773485514,
      "grad_norm": 0.0007068330887705088,
      "learning_rate": 6.057945566286216e-06,
      "loss": 0.0,
      "step": 6006
    },
    {
      "epoch": 1.7579748317237343,
      "grad_norm": 0.005987859331071377,
      "learning_rate": 6.050629206906643e-06,
      "loss": 0.0001,
      "step": 6007
    },
    {
      "epoch": 1.7582674860989171,
      "grad_norm": 0.0012825940502807498,
      "learning_rate": 6.043312847527071e-06,
      "loss": 0.0,
      "step": 6008
    },
    {
      "epoch": 1.7585601404741,
      "grad_norm": 0.0008381876978091896,
      "learning_rate": 6.035996488147498e-06,
      "loss": 0.0,
      "step": 6009
    },
    {
      "epoch": 1.758852794849283,
      "grad_norm": 0.0004752900858875364,
      "learning_rate": 6.0286801287679255e-06,
      "loss": 0.0,
      "step": 6010
    },
    {
      "epoch": 1.7591454492244658,
      "grad_norm": 0.0021880939602851868,
      "learning_rate": 6.0213637693883526e-06,
      "loss": 0.0,
      "step": 6011
    },
    {
      "epoch": 1.7594381035996487,
      "grad_norm": 0.000659592158626765,
      "learning_rate": 6.0140474100087805e-06,
      "loss": 0.0,
      "step": 6012
    },
    {
      "epoch": 1.7597307579748316,
      "grad_norm": 0.001379102817736566,
      "learning_rate": 6.006731050629208e-06,
      "loss": 0.0,
      "step": 6013
    },
    {
      "epoch": 1.7600234123500145,
      "grad_norm": 0.0027575218118727207,
      "learning_rate": 5.999414691249635e-06,
      "loss": 0.0,
      "step": 6014
    },
    {
      "epoch": 1.7603160667251976,
      "grad_norm": 0.0003732953919097781,
      "learning_rate": 5.992098331870062e-06,
      "loss": 0.0,
      "step": 6015
    },
    {
      "epoch": 1.7606087211003805,
      "grad_norm": 0.0004398910969030112,
      "learning_rate": 5.984781972490489e-06,
      "loss": 0.0,
      "step": 6016
    },
    {
      "epoch": 1.7609013754755634,
      "grad_norm": 0.0027367311995476484,
      "learning_rate": 5.977465613110916e-06,
      "loss": 0.0,
      "step": 6017
    },
    {
      "epoch": 1.7611940298507462,
      "grad_norm": 0.0009312551701441407,
      "learning_rate": 5.970149253731343e-06,
      "loss": 0.0,
      "step": 6018
    },
    {
      "epoch": 1.7614866842259291,
      "grad_norm": 0.00025712751084938645,
      "learning_rate": 5.96283289435177e-06,
      "loss": 0.0,
      "step": 6019
    },
    {
      "epoch": 1.7617793386011122,
      "grad_norm": 0.0007998314686119556,
      "learning_rate": 5.955516534972198e-06,
      "loss": 0.0,
      "step": 6020
    },
    {
      "epoch": 1.7620719929762951,
      "grad_norm": 1.298336148262024,
      "learning_rate": 5.948200175592625e-06,
      "loss": 0.0054,
      "step": 6021
    },
    {
      "epoch": 1.762364647351478,
      "grad_norm": 0.0007662497810088098,
      "learning_rate": 5.940883816213052e-06,
      "loss": 0.0,
      "step": 6022
    },
    {
      "epoch": 1.762657301726661,
      "grad_norm": 0.00021636112069245428,
      "learning_rate": 5.9335674568334795e-06,
      "loss": 0.0,
      "step": 6023
    },
    {
      "epoch": 1.7629499561018438,
      "grad_norm": 0.0003535682044457644,
      "learning_rate": 5.9262510974539075e-06,
      "loss": 0.0,
      "step": 6024
    },
    {
      "epoch": 1.7632426104770267,
      "grad_norm": 0.0011517098173499107,
      "learning_rate": 5.9189347380743346e-06,
      "loss": 0.0,
      "step": 6025
    },
    {
      "epoch": 1.7635352648522096,
      "grad_norm": 0.0010904577793553472,
      "learning_rate": 5.911618378694762e-06,
      "loss": 0.0,
      "step": 6026
    },
    {
      "epoch": 1.7638279192273925,
      "grad_norm": 0.0008237168658524752,
      "learning_rate": 5.904302019315189e-06,
      "loss": 0.0,
      "step": 6027
    },
    {
      "epoch": 1.7641205736025753,
      "grad_norm": 0.0005817033816128969,
      "learning_rate": 5.896985659935617e-06,
      "loss": 0.0,
      "step": 6028
    },
    {
      "epoch": 1.7644132279777582,
      "grad_norm": 0.0004842921916861087,
      "learning_rate": 5.889669300556044e-06,
      "loss": 0.0,
      "step": 6029
    },
    {
      "epoch": 1.7647058823529411,
      "grad_norm": 0.0006301872199401259,
      "learning_rate": 5.882352941176471e-06,
      "loss": 0.0,
      "step": 6030
    },
    {
      "epoch": 1.764998536728124,
      "grad_norm": 0.005268681328743696,
      "learning_rate": 5.875036581796898e-06,
      "loss": 0.0001,
      "step": 6031
    },
    {
      "epoch": 1.765291191103307,
      "grad_norm": 0.001128269243054092,
      "learning_rate": 5.867720222417326e-06,
      "loss": 0.0,
      "step": 6032
    },
    {
      "epoch": 1.7655838454784898,
      "grad_norm": 0.0004982168320566416,
      "learning_rate": 5.860403863037753e-06,
      "loss": 0.0,
      "step": 6033
    },
    {
      "epoch": 1.7658764998536727,
      "grad_norm": 0.0007496103644371033,
      "learning_rate": 5.85308750365818e-06,
      "loss": 0.0,
      "step": 6034
    },
    {
      "epoch": 1.7661691542288556,
      "grad_norm": 0.0005136104300618172,
      "learning_rate": 5.845771144278607e-06,
      "loss": 0.0,
      "step": 6035
    },
    {
      "epoch": 1.7664618086040387,
      "grad_norm": 0.0015976725844666362,
      "learning_rate": 5.838454784899034e-06,
      "loss": 0.0,
      "step": 6036
    },
    {
      "epoch": 1.7667544629792216,
      "grad_norm": 0.0012290143640711904,
      "learning_rate": 5.8311384255194615e-06,
      "loss": 0.0,
      "step": 6037
    },
    {
      "epoch": 1.7670471173544045,
      "grad_norm": 0.00034109456464648247,
      "learning_rate": 5.823822066139889e-06,
      "loss": 0.0,
      "step": 6038
    },
    {
      "epoch": 1.7673397717295873,
      "grad_norm": 0.020967284217476845,
      "learning_rate": 5.816505706760316e-06,
      "loss": 0.0002,
      "step": 6039
    },
    {
      "epoch": 1.7676324261047702,
      "grad_norm": 0.002137015573680401,
      "learning_rate": 5.809189347380744e-06,
      "loss": 0.0,
      "step": 6040
    },
    {
      "epoch": 1.7679250804799533,
      "grad_norm": 0.003661647904664278,
      "learning_rate": 5.801872988001171e-06,
      "loss": 0.0001,
      "step": 6041
    },
    {
      "epoch": 1.7682177348551362,
      "grad_norm": 0.010518140159547329,
      "learning_rate": 5.794556628621598e-06,
      "loss": 0.0001,
      "step": 6042
    },
    {
      "epoch": 1.7685103892303191,
      "grad_norm": 0.0016498948680236936,
      "learning_rate": 5.787240269242025e-06,
      "loss": 0.0,
      "step": 6043
    },
    {
      "epoch": 1.768803043605502,
      "grad_norm": 0.000788530393037945,
      "learning_rate": 5.779923909862453e-06,
      "loss": 0.0,
      "step": 6044
    },
    {
      "epoch": 1.769095697980685,
      "grad_norm": 0.0015095038106665015,
      "learning_rate": 5.77260755048288e-06,
      "loss": 0.0,
      "step": 6045
    },
    {
      "epoch": 1.7693883523558678,
      "grad_norm": 0.0013496472965925932,
      "learning_rate": 5.765291191103307e-06,
      "loss": 0.0,
      "step": 6046
    },
    {
      "epoch": 1.7696810067310507,
      "grad_norm": 1.13015878200531,
      "learning_rate": 5.757974831723734e-06,
      "loss": 0.0057,
      "step": 6047
    },
    {
      "epoch": 1.7699736611062336,
      "grad_norm": 0.36021360754966736,
      "learning_rate": 5.750658472344162e-06,
      "loss": 0.0013,
      "step": 6048
    },
    {
      "epoch": 1.7702663154814164,
      "grad_norm": 0.0015636446187272668,
      "learning_rate": 5.743342112964589e-06,
      "loss": 0.0,
      "step": 6049
    },
    {
      "epoch": 1.7705589698565993,
      "grad_norm": 0.0010189537424594164,
      "learning_rate": 5.736025753585016e-06,
      "loss": 0.0,
      "step": 6050
    },
    {
      "epoch": 1.7708516242317822,
      "grad_norm": 0.020609412342309952,
      "learning_rate": 5.7287093942054435e-06,
      "loss": 0.0002,
      "step": 6051
    },
    {
      "epoch": 1.771144278606965,
      "grad_norm": 0.0023420306388288736,
      "learning_rate": 5.7213930348258714e-06,
      "loss": 0.0,
      "step": 6052
    },
    {
      "epoch": 1.771436932982148,
      "grad_norm": 0.0005672802799381316,
      "learning_rate": 5.7140766754462985e-06,
      "loss": 0.0,
      "step": 6053
    },
    {
      "epoch": 1.7717295873573309,
      "grad_norm": 0.0013113165041431785,
      "learning_rate": 5.706760316066726e-06,
      "loss": 0.0,
      "step": 6054
    },
    {
      "epoch": 1.7720222417325138,
      "grad_norm": 0.004139709286391735,
      "learning_rate": 5.699443956687153e-06,
      "loss": 0.0001,
      "step": 6055
    },
    {
      "epoch": 1.7723148961076967,
      "grad_norm": 0.0018005968304350972,
      "learning_rate": 5.692127597307581e-06,
      "loss": 0.0,
      "step": 6056
    },
    {
      "epoch": 1.7726075504828798,
      "grad_norm": 0.0013284042943269014,
      "learning_rate": 5.684811237928008e-06,
      "loss": 0.0,
      "step": 6057
    },
    {
      "epoch": 1.7729002048580627,
      "grad_norm": 0.00022370753868017346,
      "learning_rate": 5.677494878548435e-06,
      "loss": 0.0,
      "step": 6058
    },
    {
      "epoch": 1.7731928592332455,
      "grad_norm": 0.00017913279589265585,
      "learning_rate": 5.670178519168862e-06,
      "loss": 0.0,
      "step": 6059
    },
    {
      "epoch": 1.7734855136084284,
      "grad_norm": 0.000413614819990471,
      "learning_rate": 5.662862159789289e-06,
      "loss": 0.0,
      "step": 6060
    },
    {
      "epoch": 1.7737781679836113,
      "grad_norm": 0.0003607451217249036,
      "learning_rate": 5.655545800409716e-06,
      "loss": 0.0,
      "step": 6061
    },
    {
      "epoch": 1.7740708223587944,
      "grad_norm": 0.00033977828570641577,
      "learning_rate": 5.648229441030143e-06,
      "loss": 0.0,
      "step": 6062
    },
    {
      "epoch": 1.7743634767339773,
      "grad_norm": 0.0004045834648422897,
      "learning_rate": 5.64091308165057e-06,
      "loss": 0.0,
      "step": 6063
    },
    {
      "epoch": 1.7746561311091602,
      "grad_norm": 0.0006311695324257016,
      "learning_rate": 5.6335967222709975e-06,
      "loss": 0.0,
      "step": 6064
    },
    {
      "epoch": 1.774948785484343,
      "grad_norm": 0.011869017034769058,
      "learning_rate": 5.6262803628914255e-06,
      "loss": 0.0001,
      "step": 6065
    },
    {
      "epoch": 1.775241439859526,
      "grad_norm": 0.003849781583994627,
      "learning_rate": 5.6189640035118526e-06,
      "loss": 0.0001,
      "step": 6066
    },
    {
      "epoch": 1.7755340942347089,
      "grad_norm": 0.008825070224702358,
      "learning_rate": 5.61164764413228e-06,
      "loss": 0.0001,
      "step": 6067
    },
    {
      "epoch": 1.7758267486098918,
      "grad_norm": 0.0005195000558160245,
      "learning_rate": 5.604331284752707e-06,
      "loss": 0.0,
      "step": 6068
    },
    {
      "epoch": 1.7761194029850746,
      "grad_norm": 0.0002385463158134371,
      "learning_rate": 5.597014925373135e-06,
      "loss": 0.0,
      "step": 6069
    },
    {
      "epoch": 1.7764120573602575,
      "grad_norm": 0.0003511181566864252,
      "learning_rate": 5.589698565993562e-06,
      "loss": 0.0,
      "step": 6070
    },
    {
      "epoch": 1.7767047117354404,
      "grad_norm": 0.000668007880449295,
      "learning_rate": 5.582382206613989e-06,
      "loss": 0.0,
      "step": 6071
    },
    {
      "epoch": 1.7769973661106233,
      "grad_norm": 0.001052291365340352,
      "learning_rate": 5.575065847234416e-06,
      "loss": 0.0,
      "step": 6072
    },
    {
      "epoch": 1.7772900204858062,
      "grad_norm": 0.0008030377794057131,
      "learning_rate": 5.567749487854844e-06,
      "loss": 0.0,
      "step": 6073
    },
    {
      "epoch": 1.777582674860989,
      "grad_norm": 0.0005521881976164877,
      "learning_rate": 5.560433128475271e-06,
      "loss": 0.0,
      "step": 6074
    },
    {
      "epoch": 1.777875329236172,
      "grad_norm": 0.0007169344462454319,
      "learning_rate": 5.553116769095698e-06,
      "loss": 0.0,
      "step": 6075
    },
    {
      "epoch": 1.7781679836113549,
      "grad_norm": 0.0018445259192958474,
      "learning_rate": 5.545800409716125e-06,
      "loss": 0.0,
      "step": 6076
    },
    {
      "epoch": 1.7784606379865378,
      "grad_norm": 0.009776068851351738,
      "learning_rate": 5.538484050336553e-06,
      "loss": 0.0001,
      "step": 6077
    },
    {
      "epoch": 1.7787532923617209,
      "grad_norm": 0.00043877839925698936,
      "learning_rate": 5.53116769095698e-06,
      "loss": 0.0,
      "step": 6078
    },
    {
      "epoch": 1.7790459467369037,
      "grad_norm": 0.0015850827330723405,
      "learning_rate": 5.5238513315774074e-06,
      "loss": 0.0,
      "step": 6079
    },
    {
      "epoch": 1.7793386011120866,
      "grad_norm": 13.653970718383789,
      "learning_rate": 5.5165349721978346e-06,
      "loss": 0.0361,
      "step": 6080
    },
    {
      "epoch": 1.7796312554872695,
      "grad_norm": 0.00023248694196809083,
      "learning_rate": 5.5092186128182625e-06,
      "loss": 0.0,
      "step": 6081
    },
    {
      "epoch": 1.7799239098624524,
      "grad_norm": 0.0009998355526477098,
      "learning_rate": 5.50190225343869e-06,
      "loss": 0.0,
      "step": 6082
    },
    {
      "epoch": 1.7802165642376353,
      "grad_norm": 0.0004500342474784702,
      "learning_rate": 5.494585894059117e-06,
      "loss": 0.0,
      "step": 6083
    },
    {
      "epoch": 1.7805092186128184,
      "grad_norm": 0.0004393111157696694,
      "learning_rate": 5.487269534679544e-06,
      "loss": 0.0,
      "step": 6084
    },
    {
      "epoch": 1.7808018729880013,
      "grad_norm": 0.0019799573346972466,
      "learning_rate": 5.479953175299971e-06,
      "loss": 0.0,
      "step": 6085
    },
    {
      "epoch": 1.7810945273631842,
      "grad_norm": 0.000697512470651418,
      "learning_rate": 5.472636815920398e-06,
      "loss": 0.0,
      "step": 6086
    },
    {
      "epoch": 1.781387181738367,
      "grad_norm": 0.000314964447170496,
      "learning_rate": 5.465320456540825e-06,
      "loss": 0.0,
      "step": 6087
    },
    {
      "epoch": 1.78167983611355,
      "grad_norm": 0.006685046944767237,
      "learning_rate": 5.458004097161252e-06,
      "loss": 0.0001,
      "step": 6088
    },
    {
      "epoch": 1.7819724904887329,
      "grad_norm": 0.0002795507898554206,
      "learning_rate": 5.45068773778168e-06,
      "loss": 0.0,
      "step": 6089
    },
    {
      "epoch": 1.7822651448639157,
      "grad_norm": 0.0012665147660300136,
      "learning_rate": 5.443371378402107e-06,
      "loss": 0.0,
      "step": 6090
    },
    {
      "epoch": 1.7825577992390986,
      "grad_norm": 0.00019367047934792936,
      "learning_rate": 5.436055019022534e-06,
      "loss": 0.0,
      "step": 6091
    },
    {
      "epoch": 1.7828504536142815,
      "grad_norm": 0.0001833774003898725,
      "learning_rate": 5.4287386596429615e-06,
      "loss": 0.0,
      "step": 6092
    },
    {
      "epoch": 1.7831431079894644,
      "grad_norm": 0.00014645690680481493,
      "learning_rate": 5.4214223002633894e-06,
      "loss": 0.0,
      "step": 6093
    },
    {
      "epoch": 1.7834357623646473,
      "grad_norm": 0.0003003651218023151,
      "learning_rate": 5.4141059408838165e-06,
      "loss": 0.0,
      "step": 6094
    },
    {
      "epoch": 1.7837284167398302,
      "grad_norm": 0.0004113362228963524,
      "learning_rate": 5.406789581504244e-06,
      "loss": 0.0,
      "step": 6095
    },
    {
      "epoch": 1.784021071115013,
      "grad_norm": 0.00032383357756771147,
      "learning_rate": 5.399473222124671e-06,
      "loss": 0.0,
      "step": 6096
    },
    {
      "epoch": 1.784313725490196,
      "grad_norm": 0.001158069702796638,
      "learning_rate": 5.392156862745099e-06,
      "loss": 0.0,
      "step": 6097
    },
    {
      "epoch": 1.7846063798653788,
      "grad_norm": 0.0012851745123043656,
      "learning_rate": 5.384840503365526e-06,
      "loss": 0.0,
      "step": 6098
    },
    {
      "epoch": 1.7848990342405617,
      "grad_norm": 0.00016551860608160496,
      "learning_rate": 5.377524143985953e-06,
      "loss": 0.0,
      "step": 6099
    },
    {
      "epoch": 1.7851916886157448,
      "grad_norm": 0.000494194682687521,
      "learning_rate": 5.37020778460638e-06,
      "loss": 0.0,
      "step": 6100
    },
    {
      "epoch": 1.7854843429909277,
      "grad_norm": 0.0003518579760566354,
      "learning_rate": 5.362891425226808e-06,
      "loss": 0.0,
      "step": 6101
    },
    {
      "epoch": 1.7857769973661106,
      "grad_norm": 0.0005160426953807473,
      "learning_rate": 5.355575065847235e-06,
      "loss": 0.0,
      "step": 6102
    },
    {
      "epoch": 1.7860696517412935,
      "grad_norm": 0.02960193157196045,
      "learning_rate": 5.348258706467662e-06,
      "loss": 0.0001,
      "step": 6103
    },
    {
      "epoch": 1.7863623061164764,
      "grad_norm": 0.0008992599323391914,
      "learning_rate": 5.340942347088089e-06,
      "loss": 0.0,
      "step": 6104
    },
    {
      "epoch": 1.7866549604916595,
      "grad_norm": 0.001342964475043118,
      "learning_rate": 5.333625987708516e-06,
      "loss": 0.0,
      "step": 6105
    },
    {
      "epoch": 1.7869476148668424,
      "grad_norm": 0.000340066704666242,
      "learning_rate": 5.3263096283289435e-06,
      "loss": 0.0,
      "step": 6106
    },
    {
      "epoch": 1.7872402692420253,
      "grad_norm": 0.0016259491676464677,
      "learning_rate": 5.318993268949371e-06,
      "loss": 0.0,
      "step": 6107
    },
    {
      "epoch": 1.7875329236172082,
      "grad_norm": 9.170470730168745e-05,
      "learning_rate": 5.3116769095697985e-06,
      "loss": 0.0,
      "step": 6108
    },
    {
      "epoch": 1.787825577992391,
      "grad_norm": 0.00018735171761363745,
      "learning_rate": 5.304360550190226e-06,
      "loss": 0.0,
      "step": 6109
    },
    {
      "epoch": 1.788118232367574,
      "grad_norm": 0.00010125528933713213,
      "learning_rate": 5.297044190810653e-06,
      "loss": 0.0,
      "step": 6110
    },
    {
      "epoch": 1.7884108867427568,
      "grad_norm": 0.00017634555115364492,
      "learning_rate": 5.28972783143108e-06,
      "loss": 0.0,
      "step": 6111
    },
    {
      "epoch": 1.7887035411179397,
      "grad_norm": 0.0003152966091874987,
      "learning_rate": 5.282411472051507e-06,
      "loss": 0.0,
      "step": 6112
    },
    {
      "epoch": 1.7889961954931226,
      "grad_norm": 8.96536948857829e-05,
      "learning_rate": 5.275095112671935e-06,
      "loss": 0.0,
      "step": 6113
    },
    {
      "epoch": 1.7892888498683055,
      "grad_norm": 0.0004922590451315045,
      "learning_rate": 5.267778753292362e-06,
      "loss": 0.0,
      "step": 6114
    },
    {
      "epoch": 1.7895815042434884,
      "grad_norm": 0.0004588527954183519,
      "learning_rate": 5.260462393912789e-06,
      "loss": 0.0,
      "step": 6115
    },
    {
      "epoch": 1.7898741586186713,
      "grad_norm": 0.0004414925933815539,
      "learning_rate": 5.253146034533216e-06,
      "loss": 0.0,
      "step": 6116
    },
    {
      "epoch": 1.7901668129938542,
      "grad_norm": 0.0002697826421353966,
      "learning_rate": 5.245829675153644e-06,
      "loss": 0.0,
      "step": 6117
    },
    {
      "epoch": 1.790459467369037,
      "grad_norm": 0.0016554900212213397,
      "learning_rate": 5.238513315774071e-06,
      "loss": 0.0,
      "step": 6118
    },
    {
      "epoch": 1.79075212174422,
      "grad_norm": 0.0006203555967658758,
      "learning_rate": 5.231196956394498e-06,
      "loss": 0.0,
      "step": 6119
    },
    {
      "epoch": 1.7910447761194028,
      "grad_norm": 0.0002844484697561711,
      "learning_rate": 5.2238805970149255e-06,
      "loss": 0.0,
      "step": 6120
    },
    {
      "epoch": 1.791337430494586,
      "grad_norm": 0.0003177575417794287,
      "learning_rate": 5.216564237635353e-06,
      "loss": 0.0,
      "step": 6121
    },
    {
      "epoch": 1.7916300848697688,
      "grad_norm": 0.00031799066346138716,
      "learning_rate": 5.2092478782557805e-06,
      "loss": 0.0,
      "step": 6122
    },
    {
      "epoch": 1.7919227392449517,
      "grad_norm": 0.0006663068779744208,
      "learning_rate": 5.201931518876208e-06,
      "loss": 0.0,
      "step": 6123
    },
    {
      "epoch": 1.7922153936201346,
      "grad_norm": 0.001612933585420251,
      "learning_rate": 5.194615159496635e-06,
      "loss": 0.0,
      "step": 6124
    },
    {
      "epoch": 1.7925080479953175,
      "grad_norm": 0.02046128176152706,
      "learning_rate": 5.187298800117062e-06,
      "loss": 0.0001,
      "step": 6125
    },
    {
      "epoch": 1.7928007023705006,
      "grad_norm": 0.00048213082482106984,
      "learning_rate": 5.17998244073749e-06,
      "loss": 0.0,
      "step": 6126
    },
    {
      "epoch": 1.7930933567456835,
      "grad_norm": 0.003854131791740656,
      "learning_rate": 5.172666081357917e-06,
      "loss": 0.0,
      "step": 6127
    },
    {
      "epoch": 1.7933860111208664,
      "grad_norm": 0.0006036604754626751,
      "learning_rate": 5.165349721978344e-06,
      "loss": 0.0,
      "step": 6128
    },
    {
      "epoch": 1.7936786654960493,
      "grad_norm": 0.0008485932485200465,
      "learning_rate": 5.158033362598771e-06,
      "loss": 0.0,
      "step": 6129
    },
    {
      "epoch": 1.7939713198712322,
      "grad_norm": 0.0005633328692056239,
      "learning_rate": 5.150717003219198e-06,
      "loss": 0.0,
      "step": 6130
    },
    {
      "epoch": 1.794263974246415,
      "grad_norm": 0.0004222339775878936,
      "learning_rate": 5.143400643839625e-06,
      "loss": 0.0,
      "step": 6131
    },
    {
      "epoch": 1.794556628621598,
      "grad_norm": 0.0005146698094904423,
      "learning_rate": 5.136084284460052e-06,
      "loss": 0.0,
      "step": 6132
    },
    {
      "epoch": 1.7948492829967808,
      "grad_norm": 0.0013351863017305732,
      "learning_rate": 5.1287679250804795e-06,
      "loss": 0.0,
      "step": 6133
    },
    {
      "epoch": 1.7951419373719637,
      "grad_norm": 0.0003425734757911414,
      "learning_rate": 5.1214515657009074e-06,
      "loss": 0.0,
      "step": 6134
    },
    {
      "epoch": 1.7954345917471466,
      "grad_norm": 0.001160002313554287,
      "learning_rate": 5.1141352063213345e-06,
      "loss": 0.0,
      "step": 6135
    },
    {
      "epoch": 1.7957272461223295,
      "grad_norm": 0.00015163766511250287,
      "learning_rate": 5.106818846941762e-06,
      "loss": 0.0,
      "step": 6136
    },
    {
      "epoch": 1.7960199004975124,
      "grad_norm": 0.000965707004070282,
      "learning_rate": 5.099502487562189e-06,
      "loss": 0.0,
      "step": 6137
    },
    {
      "epoch": 1.7963125548726953,
      "grad_norm": 0.00031617077183909714,
      "learning_rate": 5.092186128182617e-06,
      "loss": 0.0,
      "step": 6138
    },
    {
      "epoch": 1.7966052092478781,
      "grad_norm": 0.0001316484558628872,
      "learning_rate": 5.084869768803044e-06,
      "loss": 0.0,
      "step": 6139
    },
    {
      "epoch": 1.796897863623061,
      "grad_norm": 0.00207359972409904,
      "learning_rate": 5.077553409423471e-06,
      "loss": 0.0,
      "step": 6140
    },
    {
      "epoch": 1.797190517998244,
      "grad_norm": 0.0006536368746310472,
      "learning_rate": 5.070237050043898e-06,
      "loss": 0.0,
      "step": 6141
    },
    {
      "epoch": 1.797483172373427,
      "grad_norm": 0.0003556523297447711,
      "learning_rate": 5.062920690664326e-06,
      "loss": 0.0,
      "step": 6142
    },
    {
      "epoch": 1.79777582674861,
      "grad_norm": 0.012084085494279861,
      "learning_rate": 5.055604331284753e-06,
      "loss": 0.0001,
      "step": 6143
    },
    {
      "epoch": 1.7980684811237928,
      "grad_norm": 0.00036497731343843043,
      "learning_rate": 5.04828797190518e-06,
      "loss": 0.0,
      "step": 6144
    },
    {
      "epoch": 1.7983611354989757,
      "grad_norm": 0.002084647072479129,
      "learning_rate": 5.040971612525607e-06,
      "loss": 0.0,
      "step": 6145
    },
    {
      "epoch": 1.7986537898741586,
      "grad_norm": 0.0014732355484738946,
      "learning_rate": 5.033655253146035e-06,
      "loss": 0.0,
      "step": 6146
    },
    {
      "epoch": 1.7989464442493417,
      "grad_norm": 0.0006228667916730046,
      "learning_rate": 5.026338893766462e-06,
      "loss": 0.0,
      "step": 6147
    },
    {
      "epoch": 1.7992390986245246,
      "grad_norm": 0.0002631722891237587,
      "learning_rate": 5.0190225343868894e-06,
      "loss": 0.0,
      "step": 6148
    },
    {
      "epoch": 1.7995317529997075,
      "grad_norm": 0.00019564389367587864,
      "learning_rate": 5.0117061750073165e-06,
      "loss": 0.0,
      "step": 6149
    },
    {
      "epoch": 1.7998244073748904,
      "grad_norm": 0.9656554460525513,
      "learning_rate": 5.0043898156277445e-06,
      "loss": 0.0066,
      "step": 6150
    },
    {
      "epoch": 1.8001170617500732,
      "grad_norm": 0.001417526276782155,
      "learning_rate": 4.997073456248172e-06,
      "loss": 0.0,
      "step": 6151
    },
    {
      "epoch": 1.8004097161252561,
      "grad_norm": 0.00033955316757783294,
      "learning_rate": 4.989757096868599e-06,
      "loss": 0.0,
      "step": 6152
    },
    {
      "epoch": 1.800702370500439,
      "grad_norm": 0.0006844167364761233,
      "learning_rate": 4.982440737489026e-06,
      "loss": 0.0,
      "step": 6153
    },
    {
      "epoch": 1.800995024875622,
      "grad_norm": 0.0009767315350472927,
      "learning_rate": 4.975124378109453e-06,
      "loss": 0.0,
      "step": 6154
    },
    {
      "epoch": 1.8012876792508048,
      "grad_norm": 0.0001971491874428466,
      "learning_rate": 4.96780801872988e-06,
      "loss": 0.0,
      "step": 6155
    },
    {
      "epoch": 1.8015803336259877,
      "grad_norm": 0.00021163126803003252,
      "learning_rate": 4.960491659350307e-06,
      "loss": 0.0,
      "step": 6156
    },
    {
      "epoch": 1.8018729880011706,
      "grad_norm": 0.003267401596531272,
      "learning_rate": 4.953175299970734e-06,
      "loss": 0.0,
      "step": 6157
    },
    {
      "epoch": 1.8021656423763535,
      "grad_norm": 0.0008824951364658773,
      "learning_rate": 4.945858940591162e-06,
      "loss": 0.0,
      "step": 6158
    },
    {
      "epoch": 1.8024582967515363,
      "grad_norm": 3.2411012649536133,
      "learning_rate": 4.938542581211589e-06,
      "loss": 0.1711,
      "step": 6159
    },
    {
      "epoch": 1.8027509511267192,
      "grad_norm": 0.00016725769091863185,
      "learning_rate": 4.931226221832016e-06,
      "loss": 0.0,
      "step": 6160
    },
    {
      "epoch": 1.8030436055019021,
      "grad_norm": 0.00017268993542529643,
      "learning_rate": 4.9239098624524435e-06,
      "loss": 0.0,
      "step": 6161
    },
    {
      "epoch": 1.803336259877085,
      "grad_norm": 0.001485522836446762,
      "learning_rate": 4.916593503072871e-06,
      "loss": 0.0,
      "step": 6162
    },
    {
      "epoch": 1.8036289142522681,
      "grad_norm": 0.011133319698274136,
      "learning_rate": 4.9092771436932985e-06,
      "loss": 0.0001,
      "step": 6163
    },
    {
      "epoch": 1.803921568627451,
      "grad_norm": 0.0012351942714303732,
      "learning_rate": 4.901960784313726e-06,
      "loss": 0.0,
      "step": 6164
    },
    {
      "epoch": 1.804214223002634,
      "grad_norm": 0.02805311419069767,
      "learning_rate": 4.894644424934153e-06,
      "loss": 0.0001,
      "step": 6165
    },
    {
      "epoch": 1.8045068773778168,
      "grad_norm": 0.0007728440104983747,
      "learning_rate": 4.887328065554581e-06,
      "loss": 0.0,
      "step": 6166
    },
    {
      "epoch": 1.8047995317529997,
      "grad_norm": 0.0007231071358546615,
      "learning_rate": 4.880011706175008e-06,
      "loss": 0.0,
      "step": 6167
    },
    {
      "epoch": 1.8050921861281826,
      "grad_norm": 0.0025570434518158436,
      "learning_rate": 4.872695346795435e-06,
      "loss": 0.0001,
      "step": 6168
    },
    {
      "epoch": 1.8053848405033657,
      "grad_norm": 0.0006787261227145791,
      "learning_rate": 4.865378987415862e-06,
      "loss": 0.0,
      "step": 6169
    },
    {
      "epoch": 1.8056774948785486,
      "grad_norm": 0.0006139308679848909,
      "learning_rate": 4.85806262803629e-06,
      "loss": 0.0,
      "step": 6170
    },
    {
      "epoch": 1.8059701492537314,
      "grad_norm": 0.0007231313502416015,
      "learning_rate": 4.850746268656717e-06,
      "loss": 0.0,
      "step": 6171
    },
    {
      "epoch": 1.8062628036289143,
      "grad_norm": 0.0019035454606637359,
      "learning_rate": 4.843429909277144e-06,
      "loss": 0.0,
      "step": 6172
    },
    {
      "epoch": 1.8065554580040972,
      "grad_norm": 0.00047850527334958315,
      "learning_rate": 4.836113549897571e-06,
      "loss": 0.0,
      "step": 6173
    },
    {
      "epoch": 1.8068481123792801,
      "grad_norm": 0.00034121458884328604,
      "learning_rate": 4.828797190517999e-06,
      "loss": 0.0,
      "step": 6174
    },
    {
      "epoch": 1.807140766754463,
      "grad_norm": 0.01851416565477848,
      "learning_rate": 4.821480831138426e-06,
      "loss": 0.0002,
      "step": 6175
    },
    {
      "epoch": 1.807433421129646,
      "grad_norm": 0.00022216422075871378,
      "learning_rate": 4.814164471758853e-06,
      "loss": 0.0,
      "step": 6176
    },
    {
      "epoch": 1.8077260755048288,
      "grad_norm": 0.0003145903756376356,
      "learning_rate": 4.8068481123792805e-06,
      "loss": 0.0,
      "step": 6177
    },
    {
      "epoch": 1.8080187298800117,
      "grad_norm": 0.0008986227912828326,
      "learning_rate": 4.799531752999708e-06,
      "loss": 0.0,
      "step": 6178
    },
    {
      "epoch": 1.8083113842551946,
      "grad_norm": 0.0014955041697248816,
      "learning_rate": 4.792215393620135e-06,
      "loss": 0.0,
      "step": 6179
    },
    {
      "epoch": 1.8086040386303774,
      "grad_norm": 0.0005902801640331745,
      "learning_rate": 4.784899034240562e-06,
      "loss": 0.0,
      "step": 6180
    },
    {
      "epoch": 1.8088966930055603,
      "grad_norm": 0.0005175388068892062,
      "learning_rate": 4.777582674860989e-06,
      "loss": 0.0,
      "step": 6181
    },
    {
      "epoch": 1.8091893473807432,
      "grad_norm": 0.0007320493459701538,
      "learning_rate": 4.770266315481417e-06,
      "loss": 0.0,
      "step": 6182
    },
    {
      "epoch": 1.809482001755926,
      "grad_norm": 0.0008106476161628962,
      "learning_rate": 4.762949956101844e-06,
      "loss": 0.0,
      "step": 6183
    },
    {
      "epoch": 1.809774656131109,
      "grad_norm": 0.0032793600112199783,
      "learning_rate": 4.755633596722271e-06,
      "loss": 0.0,
      "step": 6184
    },
    {
      "epoch": 1.810067310506292,
      "grad_norm": 0.00037016900023445487,
      "learning_rate": 4.748317237342698e-06,
      "loss": 0.0,
      "step": 6185
    },
    {
      "epoch": 1.810359964881475,
      "grad_norm": 0.0010186518775299191,
      "learning_rate": 4.741000877963126e-06,
      "loss": 0.0,
      "step": 6186
    },
    {
      "epoch": 1.8106526192566579,
      "grad_norm": 0.001128872623667121,
      "learning_rate": 4.733684518583553e-06,
      "loss": 0.0,
      "step": 6187
    },
    {
      "epoch": 1.8109452736318408,
      "grad_norm": 0.0027060238644480705,
      "learning_rate": 4.72636815920398e-06,
      "loss": 0.0,
      "step": 6188
    },
    {
      "epoch": 1.8112379280070237,
      "grad_norm": 0.0008564601885154843,
      "learning_rate": 4.7190517998244074e-06,
      "loss": 0.0,
      "step": 6189
    },
    {
      "epoch": 1.8115305823822068,
      "grad_norm": 0.004802064970135689,
      "learning_rate": 4.7117354404448345e-06,
      "loss": 0.0001,
      "step": 6190
    },
    {
      "epoch": 1.8118232367573897,
      "grad_norm": 0.0005869761225767434,
      "learning_rate": 4.7044190810652625e-06,
      "loss": 0.0,
      "step": 6191
    },
    {
      "epoch": 1.8121158911325725,
      "grad_norm": 0.00042698491597548127,
      "learning_rate": 4.69710272168569e-06,
      "loss": 0.0,
      "step": 6192
    },
    {
      "epoch": 1.8124085455077554,
      "grad_norm": 0.0010814317502081394,
      "learning_rate": 4.689786362306117e-06,
      "loss": 0.0,
      "step": 6193
    },
    {
      "epoch": 1.8127011998829383,
      "grad_norm": 0.000888126203790307,
      "learning_rate": 4.682470002926544e-06,
      "loss": 0.0,
      "step": 6194
    },
    {
      "epoch": 1.8129938542581212,
      "grad_norm": 0.00033576504210941494,
      "learning_rate": 4.675153643546972e-06,
      "loss": 0.0,
      "step": 6195
    },
    {
      "epoch": 1.813286508633304,
      "grad_norm": 0.00024507715716026723,
      "learning_rate": 4.667837284167399e-06,
      "loss": 0.0,
      "step": 6196
    },
    {
      "epoch": 1.813579163008487,
      "grad_norm": 0.000262171815847978,
      "learning_rate": 4.660520924787826e-06,
      "loss": 0.0,
      "step": 6197
    },
    {
      "epoch": 1.8138718173836699,
      "grad_norm": 0.00517128873616457,
      "learning_rate": 4.653204565408253e-06,
      "loss": 0.0,
      "step": 6198
    },
    {
      "epoch": 1.8141644717588528,
      "grad_norm": 0.00016362473252229393,
      "learning_rate": 4.64588820602868e-06,
      "loss": 0.0,
      "step": 6199
    },
    {
      "epoch": 1.8144571261340356,
      "grad_norm": 0.0005344537785276771,
      "learning_rate": 4.638571846649107e-06,
      "loss": 0.0,
      "step": 6200
    },
    {
      "epoch": 1.8147497805092185,
      "grad_norm": 0.01572296768426895,
      "learning_rate": 4.631255487269534e-06,
      "loss": 0.0001,
      "step": 6201
    },
    {
      "epoch": 1.8150424348844014,
      "grad_norm": 0.0012269177241250873,
      "learning_rate": 4.6239391278899615e-06,
      "loss": 0.0,
      "step": 6202
    },
    {
      "epoch": 1.8153350892595843,
      "grad_norm": 0.00042726314859464765,
      "learning_rate": 4.6166227685103894e-06,
      "loss": 0.0,
      "step": 6203
    },
    {
      "epoch": 1.8156277436347672,
      "grad_norm": 0.0005853824550285935,
      "learning_rate": 4.6093064091308165e-06,
      "loss": 0.0,
      "step": 6204
    },
    {
      "epoch": 1.81592039800995,
      "grad_norm": 0.0008499994874000549,
      "learning_rate": 4.601990049751244e-06,
      "loss": 0.0,
      "step": 6205
    },
    {
      "epoch": 1.8162130523851332,
      "grad_norm": 0.0009012450464069843,
      "learning_rate": 4.594673690371671e-06,
      "loss": 0.0,
      "step": 6206
    },
    {
      "epoch": 1.816505706760316,
      "grad_norm": 0.00019460823386907578,
      "learning_rate": 4.587357330992099e-06,
      "loss": 0.0,
      "step": 6207
    },
    {
      "epoch": 1.816798361135499,
      "grad_norm": 0.0010536338668316603,
      "learning_rate": 4.580040971612526e-06,
      "loss": 0.0,
      "step": 6208
    },
    {
      "epoch": 1.8170910155106819,
      "grad_norm": 0.0004098449135199189,
      "learning_rate": 4.572724612232953e-06,
      "loss": 0.0,
      "step": 6209
    },
    {
      "epoch": 1.8173836698858647,
      "grad_norm": 0.0020609141793102026,
      "learning_rate": 4.56540825285338e-06,
      "loss": 0.0,
      "step": 6210
    },
    {
      "epoch": 1.8176763242610479,
      "grad_norm": 0.0004708924680016935,
      "learning_rate": 4.558091893473808e-06,
      "loss": 0.0,
      "step": 6211
    },
    {
      "epoch": 1.8179689786362307,
      "grad_norm": 0.00048562430310994387,
      "learning_rate": 4.550775534094235e-06,
      "loss": 0.0,
      "step": 6212
    },
    {
      "epoch": 1.8182616330114136,
      "grad_norm": 0.00024222530191764235,
      "learning_rate": 4.543459174714662e-06,
      "loss": 0.0,
      "step": 6213
    },
    {
      "epoch": 1.8185542873865965,
      "grad_norm": 0.0023239348083734512,
      "learning_rate": 4.536142815335089e-06,
      "loss": 0.0,
      "step": 6214
    },
    {
      "epoch": 1.8188469417617794,
      "grad_norm": 0.0003518529119901359,
      "learning_rate": 4.528826455955517e-06,
      "loss": 0.0,
      "step": 6215
    },
    {
      "epoch": 1.8191395961369623,
      "grad_norm": 0.001817098120227456,
      "learning_rate": 4.521510096575944e-06,
      "loss": 0.0,
      "step": 6216
    },
    {
      "epoch": 1.8194322505121452,
      "grad_norm": 0.0005887098959647119,
      "learning_rate": 4.514193737196371e-06,
      "loss": 0.0,
      "step": 6217
    },
    {
      "epoch": 1.819724904887328,
      "grad_norm": 0.003914620261639357,
      "learning_rate": 4.5068773778167985e-06,
      "loss": 0.0001,
      "step": 6218
    },
    {
      "epoch": 1.820017559262511,
      "grad_norm": 0.0035934834741055965,
      "learning_rate": 4.4995610184372265e-06,
      "loss": 0.0,
      "step": 6219
    },
    {
      "epoch": 1.8203102136376939,
      "grad_norm": 0.0004365076601970941,
      "learning_rate": 4.4922446590576536e-06,
      "loss": 0.0,
      "step": 6220
    },
    {
      "epoch": 1.8206028680128767,
      "grad_norm": 0.00044464407255873084,
      "learning_rate": 4.484928299678081e-06,
      "loss": 0.0,
      "step": 6221
    },
    {
      "epoch": 1.8208955223880596,
      "grad_norm": 0.0006023648893460631,
      "learning_rate": 4.477611940298508e-06,
      "loss": 0.0,
      "step": 6222
    },
    {
      "epoch": 1.8211881767632425,
      "grad_norm": 0.01346750371158123,
      "learning_rate": 4.470295580918935e-06,
      "loss": 0.0001,
      "step": 6223
    },
    {
      "epoch": 1.8214808311384254,
      "grad_norm": 0.00067456642864272,
      "learning_rate": 4.462979221539362e-06,
      "loss": 0.0,
      "step": 6224
    },
    {
      "epoch": 1.8217734855136083,
      "grad_norm": 0.0004930745344609022,
      "learning_rate": 4.455662862159789e-06,
      "loss": 0.0,
      "step": 6225
    },
    {
      "epoch": 1.8220661398887912,
      "grad_norm": 0.0006139373872429132,
      "learning_rate": 4.448346502780216e-06,
      "loss": 0.0,
      "step": 6226
    },
    {
      "epoch": 1.8223587942639743,
      "grad_norm": 0.000917270896025002,
      "learning_rate": 4.441030143400644e-06,
      "loss": 0.0,
      "step": 6227
    },
    {
      "epoch": 1.8226514486391572,
      "grad_norm": 0.00100823980756104,
      "learning_rate": 4.433713784021071e-06,
      "loss": 0.0,
      "step": 6228
    },
    {
      "epoch": 1.82294410301434,
      "grad_norm": 0.0018652203725650907,
      "learning_rate": 4.426397424641498e-06,
      "loss": 0.0,
      "step": 6229
    },
    {
      "epoch": 1.823236757389523,
      "grad_norm": 0.0015184390358626842,
      "learning_rate": 4.4190810652619255e-06,
      "loss": 0.0,
      "step": 6230
    },
    {
      "epoch": 1.8235294117647058,
      "grad_norm": 0.0004591936303768307,
      "learning_rate": 4.411764705882353e-06,
      "loss": 0.0,
      "step": 6231
    },
    {
      "epoch": 1.823822066139889,
      "grad_norm": 0.0018184625077992678,
      "learning_rate": 4.4044483465027805e-06,
      "loss": 0.0,
      "step": 6232
    },
    {
      "epoch": 1.8241147205150718,
      "grad_norm": 0.001610770937986672,
      "learning_rate": 4.397131987123208e-06,
      "loss": 0.0,
      "step": 6233
    },
    {
      "epoch": 1.8244073748902547,
      "grad_norm": 0.0005263009225018322,
      "learning_rate": 4.389815627743635e-06,
      "loss": 0.0,
      "step": 6234
    },
    {
      "epoch": 1.8247000292654376,
      "grad_norm": 0.0009672120795585215,
      "learning_rate": 4.382499268364063e-06,
      "loss": 0.0,
      "step": 6235
    },
    {
      "epoch": 1.8249926836406205,
      "grad_norm": 0.0011546674650162458,
      "learning_rate": 4.37518290898449e-06,
      "loss": 0.0,
      "step": 6236
    },
    {
      "epoch": 1.8252853380158034,
      "grad_norm": 0.0345073826611042,
      "learning_rate": 4.367866549604917e-06,
      "loss": 0.0002,
      "step": 6237
    },
    {
      "epoch": 1.8255779923909863,
      "grad_norm": 0.001559751806780696,
      "learning_rate": 4.360550190225344e-06,
      "loss": 0.0,
      "step": 6238
    },
    {
      "epoch": 1.8258706467661692,
      "grad_norm": 0.0009903997415676713,
      "learning_rate": 4.353233830845772e-06,
      "loss": 0.0,
      "step": 6239
    },
    {
      "epoch": 1.826163301141352,
      "grad_norm": 0.0005342111689969897,
      "learning_rate": 4.345917471466199e-06,
      "loss": 0.0,
      "step": 6240
    },
    {
      "epoch": 1.826455955516535,
      "grad_norm": 0.00039438612293452024,
      "learning_rate": 4.338601112086626e-06,
      "loss": 0.0,
      "step": 6241
    },
    {
      "epoch": 1.8267486098917178,
      "grad_norm": 0.0005523980362340808,
      "learning_rate": 4.331284752707053e-06,
      "loss": 0.0,
      "step": 6242
    },
    {
      "epoch": 1.8270412642669007,
      "grad_norm": 0.0013923441292718053,
      "learning_rate": 4.323968393327481e-06,
      "loss": 0.0,
      "step": 6243
    },
    {
      "epoch": 1.8273339186420836,
      "grad_norm": 0.0006046186317689717,
      "learning_rate": 4.316652033947908e-06,
      "loss": 0.0,
      "step": 6244
    },
    {
      "epoch": 1.8276265730172665,
      "grad_norm": 0.005008961074054241,
      "learning_rate": 4.309335674568335e-06,
      "loss": 0.0001,
      "step": 6245
    },
    {
      "epoch": 1.8279192273924494,
      "grad_norm": 0.0005424632108770311,
      "learning_rate": 4.3020193151887625e-06,
      "loss": 0.0,
      "step": 6246
    },
    {
      "epoch": 1.8282118817676323,
      "grad_norm": 0.0008885089191608131,
      "learning_rate": 4.29470295580919e-06,
      "loss": 0.0,
      "step": 6247
    },
    {
      "epoch": 1.8285045361428154,
      "grad_norm": 0.0007215089281089604,
      "learning_rate": 4.287386596429617e-06,
      "loss": 0.0,
      "step": 6248
    },
    {
      "epoch": 1.8287971905179983,
      "grad_norm": 0.0020264852792024612,
      "learning_rate": 4.280070237050044e-06,
      "loss": 0.0,
      "step": 6249
    },
    {
      "epoch": 1.8290898448931812,
      "grad_norm": 0.0008645343477837741,
      "learning_rate": 4.272753877670471e-06,
      "loss": 0.0,
      "step": 6250
    },
    {
      "epoch": 1.829382499268364,
      "grad_norm": 0.00043376587564125657,
      "learning_rate": 4.265437518290899e-06,
      "loss": 0.0,
      "step": 6251
    },
    {
      "epoch": 1.829675153643547,
      "grad_norm": 0.0011695638531818986,
      "learning_rate": 4.258121158911326e-06,
      "loss": 0.0,
      "step": 6252
    },
    {
      "epoch": 1.8299678080187298,
      "grad_norm": 0.00021304836263880134,
      "learning_rate": 4.250804799531753e-06,
      "loss": 0.0,
      "step": 6253
    },
    {
      "epoch": 1.830260462393913,
      "grad_norm": 0.0005459172534756362,
      "learning_rate": 4.24348844015218e-06,
      "loss": 0.0,
      "step": 6254
    },
    {
      "epoch": 1.8305531167690958,
      "grad_norm": 0.0005447863368317485,
      "learning_rate": 4.236172080772607e-06,
      "loss": 0.0,
      "step": 6255
    },
    {
      "epoch": 1.8308457711442787,
      "grad_norm": 0.0007374795968644321,
      "learning_rate": 4.228855721393035e-06,
      "loss": 0.0,
      "step": 6256
    },
    {
      "epoch": 1.8311384255194616,
      "grad_norm": 0.004098342731595039,
      "learning_rate": 4.221539362013462e-06,
      "loss": 0.0001,
      "step": 6257
    },
    {
      "epoch": 1.8314310798946445,
      "grad_norm": 0.00016194079944398254,
      "learning_rate": 4.2142230026338894e-06,
      "loss": 0.0,
      "step": 6258
    },
    {
      "epoch": 1.8317237342698274,
      "grad_norm": 0.0026144194416701794,
      "learning_rate": 4.2069066432543165e-06,
      "loss": 0.0,
      "step": 6259
    },
    {
      "epoch": 1.8320163886450103,
      "grad_norm": 1.628932237625122,
      "learning_rate": 4.1995902838747445e-06,
      "loss": 0.0105,
      "step": 6260
    },
    {
      "epoch": 1.8323090430201932,
      "grad_norm": 0.00035455342731438577,
      "learning_rate": 4.192273924495172e-06,
      "loss": 0.0,
      "step": 6261
    },
    {
      "epoch": 1.832601697395376,
      "grad_norm": 0.0003120841574855149,
      "learning_rate": 4.184957565115599e-06,
      "loss": 0.0,
      "step": 6262
    },
    {
      "epoch": 1.832894351770559,
      "grad_norm": 0.001973384525626898,
      "learning_rate": 4.177641205736026e-06,
      "loss": 0.0,
      "step": 6263
    },
    {
      "epoch": 1.8331870061457418,
      "grad_norm": 0.00034848632640205324,
      "learning_rate": 4.170324846356454e-06,
      "loss": 0.0,
      "step": 6264
    },
    {
      "epoch": 1.8334796605209247,
      "grad_norm": 0.0014513269998133183,
      "learning_rate": 4.163008486976881e-06,
      "loss": 0.0,
      "step": 6265
    },
    {
      "epoch": 1.8337723148961076,
      "grad_norm": 0.0016633718041703105,
      "learning_rate": 4.155692127597308e-06,
      "loss": 0.0,
      "step": 6266
    },
    {
      "epoch": 1.8340649692712905,
      "grad_norm": 0.0005357158370316029,
      "learning_rate": 4.148375768217735e-06,
      "loss": 0.0,
      "step": 6267
    },
    {
      "epoch": 1.8343576236464734,
      "grad_norm": 0.0016632014885544777,
      "learning_rate": 4.141059408838162e-06,
      "loss": 0.0,
      "step": 6268
    },
    {
      "epoch": 1.8346502780216565,
      "grad_norm": 0.0007505984976887703,
      "learning_rate": 4.133743049458589e-06,
      "loss": 0.0,
      "step": 6269
    },
    {
      "epoch": 1.8349429323968394,
      "grad_norm": 0.00036485426244325936,
      "learning_rate": 4.126426690079016e-06,
      "loss": 0.0,
      "step": 6270
    },
    {
      "epoch": 1.8352355867720223,
      "grad_norm": 0.006879924796521664,
      "learning_rate": 4.1191103306994435e-06,
      "loss": 0.0001,
      "step": 6271
    },
    {
      "epoch": 1.8355282411472051,
      "grad_norm": 0.0015742400428280234,
      "learning_rate": 4.111793971319871e-06,
      "loss": 0.0,
      "step": 6272
    },
    {
      "epoch": 1.835820895522388,
      "grad_norm": 0.0005280297482386231,
      "learning_rate": 4.1044776119402985e-06,
      "loss": 0.0,
      "step": 6273
    },
    {
      "epoch": 1.836113549897571,
      "grad_norm": 0.0004075948672834784,
      "learning_rate": 4.097161252560726e-06,
      "loss": 0.0,
      "step": 6274
    },
    {
      "epoch": 1.836406204272754,
      "grad_norm": 0.0004716550756711513,
      "learning_rate": 4.089844893181153e-06,
      "loss": 0.0,
      "step": 6275
    },
    {
      "epoch": 1.836698858647937,
      "grad_norm": 0.008090038783848286,
      "learning_rate": 4.082528533801581e-06,
      "loss": 0.0001,
      "step": 6276
    },
    {
      "epoch": 1.8369915130231198,
      "grad_norm": 0.00023774802684783936,
      "learning_rate": 4.075212174422008e-06,
      "loss": 0.0,
      "step": 6277
    },
    {
      "epoch": 1.8372841673983027,
      "grad_norm": 0.002482503652572632,
      "learning_rate": 4.067895815042435e-06,
      "loss": 0.0,
      "step": 6278
    },
    {
      "epoch": 1.8375768217734856,
      "grad_norm": 0.0006763905403204262,
      "learning_rate": 4.060579455662862e-06,
      "loss": 0.0,
      "step": 6279
    },
    {
      "epoch": 1.8378694761486685,
      "grad_norm": 0.00540641276165843,
      "learning_rate": 4.05326309628329e-06,
      "loss": 0.0001,
      "step": 6280
    },
    {
      "epoch": 1.8381621305238514,
      "grad_norm": 0.0001323170290561393,
      "learning_rate": 4.045946736903717e-06,
      "loss": 0.0,
      "step": 6281
    },
    {
      "epoch": 1.8384547848990342,
      "grad_norm": 0.0015577462036162615,
      "learning_rate": 4.038630377524144e-06,
      "loss": 0.0,
      "step": 6282
    },
    {
      "epoch": 1.8387474392742171,
      "grad_norm": 0.0008135398966260254,
      "learning_rate": 4.031314018144571e-06,
      "loss": 0.0,
      "step": 6283
    },
    {
      "epoch": 1.8390400936494,
      "grad_norm": 0.0004831902333535254,
      "learning_rate": 4.023997658764999e-06,
      "loss": 0.0,
      "step": 6284
    },
    {
      "epoch": 1.839332748024583,
      "grad_norm": 0.003544786712154746,
      "learning_rate": 4.016681299385426e-06,
      "loss": 0.0,
      "step": 6285
    },
    {
      "epoch": 1.8396254023997658,
      "grad_norm": 0.0009445445029996336,
      "learning_rate": 4.009364940005853e-06,
      "loss": 0.0,
      "step": 6286
    },
    {
      "epoch": 1.8399180567749487,
      "grad_norm": 0.0035348988603800535,
      "learning_rate": 4.0020485806262805e-06,
      "loss": 0.0,
      "step": 6287
    },
    {
      "epoch": 1.8402107111501316,
      "grad_norm": 0.0016584821278229356,
      "learning_rate": 3.9947322212467085e-06,
      "loss": 0.0,
      "step": 6288
    },
    {
      "epoch": 1.8405033655253145,
      "grad_norm": 0.00039671818376518786,
      "learning_rate": 3.9874158618671356e-06,
      "loss": 0.0,
      "step": 6289
    },
    {
      "epoch": 1.8407960199004973,
      "grad_norm": 0.0004341302264947444,
      "learning_rate": 3.980099502487563e-06,
      "loss": 0.0,
      "step": 6290
    },
    {
      "epoch": 1.8410886742756805,
      "grad_norm": 0.002159208757802844,
      "learning_rate": 3.97278314310799e-06,
      "loss": 0.0,
      "step": 6291
    },
    {
      "epoch": 1.8413813286508633,
      "grad_norm": 0.0012557889567688107,
      "learning_rate": 3.965466783728417e-06,
      "loss": 0.0,
      "step": 6292
    },
    {
      "epoch": 1.8416739830260462,
      "grad_norm": 0.0004266448086127639,
      "learning_rate": 3.958150424348844e-06,
      "loss": 0.0,
      "step": 6293
    },
    {
      "epoch": 1.8419666374012291,
      "grad_norm": 0.00016384995251428336,
      "learning_rate": 3.950834064969271e-06,
      "loss": 0.0,
      "step": 6294
    },
    {
      "epoch": 1.842259291776412,
      "grad_norm": 0.00015025155153125525,
      "learning_rate": 3.943517705589698e-06,
      "loss": 0.0,
      "step": 6295
    },
    {
      "epoch": 1.8425519461515951,
      "grad_norm": 9.3340482711792,
      "learning_rate": 3.936201346210126e-06,
      "loss": 0.1112,
      "step": 6296
    },
    {
      "epoch": 1.842844600526778,
      "grad_norm": 0.0023394993040710688,
      "learning_rate": 3.928884986830553e-06,
      "loss": 0.0,
      "step": 6297
    },
    {
      "epoch": 1.843137254901961,
      "grad_norm": 0.000579311337787658,
      "learning_rate": 3.92156862745098e-06,
      "loss": 0.0,
      "step": 6298
    },
    {
      "epoch": 1.8434299092771438,
      "grad_norm": 0.0004900764324702322,
      "learning_rate": 3.9142522680714074e-06,
      "loss": 0.0,
      "step": 6299
    },
    {
      "epoch": 1.8437225636523267,
      "grad_norm": 0.0003009681240655482,
      "learning_rate": 3.906935908691835e-06,
      "loss": 0.0,
      "step": 6300
    },
    {
      "epoch": 1.8440152180275096,
      "grad_norm": 0.0026180697605013847,
      "learning_rate": 3.8996195493122625e-06,
      "loss": 0.0,
      "step": 6301
    },
    {
      "epoch": 1.8443078724026924,
      "grad_norm": 0.004241331480443478,
      "learning_rate": 3.89230318993269e-06,
      "loss": 0.0001,
      "step": 6302
    },
    {
      "epoch": 1.8446005267778753,
      "grad_norm": 0.0010259848786517978,
      "learning_rate": 3.884986830553117e-06,
      "loss": 0.0,
      "step": 6303
    },
    {
      "epoch": 1.8448931811530582,
      "grad_norm": 23.828861236572266,
      "learning_rate": 3.877670471173545e-06,
      "loss": 0.0556,
      "step": 6304
    },
    {
      "epoch": 1.8451858355282411,
      "grad_norm": 0.00034048352972604334,
      "learning_rate": 3.870354111793972e-06,
      "loss": 0.0,
      "step": 6305
    },
    {
      "epoch": 1.845478489903424,
      "grad_norm": 0.0010296351974830031,
      "learning_rate": 3.863037752414399e-06,
      "loss": 0.0,
      "step": 6306
    },
    {
      "epoch": 1.845771144278607,
      "grad_norm": 0.0013108521234244108,
      "learning_rate": 3.855721393034826e-06,
      "loss": 0.0,
      "step": 6307
    },
    {
      "epoch": 1.8460637986537898,
      "grad_norm": 0.0008955459343269467,
      "learning_rate": 3.848405033655254e-06,
      "loss": 0.0,
      "step": 6308
    },
    {
      "epoch": 1.8463564530289727,
      "grad_norm": 0.000490850827191025,
      "learning_rate": 3.841088674275681e-06,
      "loss": 0.0,
      "step": 6309
    },
    {
      "epoch": 1.8466491074041556,
      "grad_norm": 0.0005940706469118595,
      "learning_rate": 3.833772314896108e-06,
      "loss": 0.0,
      "step": 6310
    },
    {
      "epoch": 1.8469417617793384,
      "grad_norm": 0.0010513426968827844,
      "learning_rate": 3.826455955516535e-06,
      "loss": 0.0,
      "step": 6311
    },
    {
      "epoch": 1.8472344161545216,
      "grad_norm": 0.0002777844783850014,
      "learning_rate": 3.819139596136963e-06,
      "loss": 0.0,
      "step": 6312
    },
    {
      "epoch": 1.8475270705297044,
      "grad_norm": 0.0004163272387813777,
      "learning_rate": 3.81182323675739e-06,
      "loss": 0.0,
      "step": 6313
    },
    {
      "epoch": 1.8478197249048873,
      "grad_norm": 0.008874928578734398,
      "learning_rate": 3.804506877377817e-06,
      "loss": 0.0001,
      "step": 6314
    },
    {
      "epoch": 1.8481123792800702,
      "grad_norm": 0.0009299430530518293,
      "learning_rate": 3.797190517998244e-06,
      "loss": 0.0,
      "step": 6315
    },
    {
      "epoch": 1.848405033655253,
      "grad_norm": 0.0005185367190279067,
      "learning_rate": 3.789874158618672e-06,
      "loss": 0.0,
      "step": 6316
    },
    {
      "epoch": 1.8486976880304362,
      "grad_norm": 0.0009174785809591413,
      "learning_rate": 3.782557799239099e-06,
      "loss": 0.0,
      "step": 6317
    },
    {
      "epoch": 1.848990342405619,
      "grad_norm": 0.0004378536541480571,
      "learning_rate": 3.775241439859526e-06,
      "loss": 0.0,
      "step": 6318
    },
    {
      "epoch": 1.849282996780802,
      "grad_norm": 0.000477397843496874,
      "learning_rate": 3.7679250804799533e-06,
      "loss": 0.0,
      "step": 6319
    },
    {
      "epoch": 1.8495756511559849,
      "grad_norm": 0.002084932057186961,
      "learning_rate": 3.7606087211003804e-06,
      "loss": 0.0,
      "step": 6320
    },
    {
      "epoch": 1.8498683055311678,
      "grad_norm": 0.00024068816856015474,
      "learning_rate": 3.753292361720808e-06,
      "loss": 0.0,
      "step": 6321
    },
    {
      "epoch": 1.8501609599063507,
      "grad_norm": 0.002064603613689542,
      "learning_rate": 3.745976002341235e-06,
      "loss": 0.0,
      "step": 6322
    },
    {
      "epoch": 1.8504536142815335,
      "grad_norm": 8.975202945293859e-05,
      "learning_rate": 3.738659642961662e-06,
      "loss": 0.0,
      "step": 6323
    },
    {
      "epoch": 1.8507462686567164,
      "grad_norm": 0.000657002383377403,
      "learning_rate": 3.7313432835820893e-06,
      "loss": 0.0,
      "step": 6324
    },
    {
      "epoch": 1.8510389230318993,
      "grad_norm": 0.0009384411969222128,
      "learning_rate": 3.724026924202517e-06,
      "loss": 0.0,
      "step": 6325
    },
    {
      "epoch": 1.8513315774070822,
      "grad_norm": 0.00032740531605668366,
      "learning_rate": 3.7167105648229443e-06,
      "loss": 0.0,
      "step": 6326
    },
    {
      "epoch": 1.851624231782265,
      "grad_norm": 7.135482883313671e-05,
      "learning_rate": 3.7093942054433714e-06,
      "loss": 0.0,
      "step": 6327
    },
    {
      "epoch": 1.851916886157448,
      "grad_norm": 0.0014389071147888899,
      "learning_rate": 3.7020778460637985e-06,
      "loss": 0.0,
      "step": 6328
    },
    {
      "epoch": 1.8522095405326309,
      "grad_norm": 0.00794234685599804,
      "learning_rate": 3.6947614866842265e-06,
      "loss": 0.0,
      "step": 6329
    },
    {
      "epoch": 1.8525021949078138,
      "grad_norm": 0.0008657536236569285,
      "learning_rate": 3.6874451273046536e-06,
      "loss": 0.0,
      "step": 6330
    },
    {
      "epoch": 1.8527948492829966,
      "grad_norm": 0.0003155196609441191,
      "learning_rate": 3.6801287679250807e-06,
      "loss": 0.0,
      "step": 6331
    },
    {
      "epoch": 1.8530875036581795,
      "grad_norm": 0.0004522484668996185,
      "learning_rate": 3.6728124085455078e-06,
      "loss": 0.0,
      "step": 6332
    },
    {
      "epoch": 1.8533801580333626,
      "grad_norm": 0.0005152150406502187,
      "learning_rate": 3.6654960491659353e-06,
      "loss": 0.0,
      "step": 6333
    },
    {
      "epoch": 1.8536728124085455,
      "grad_norm": 0.0018022602889686823,
      "learning_rate": 3.6581796897863624e-06,
      "loss": 0.0,
      "step": 6334
    },
    {
      "epoch": 1.8539654667837284,
      "grad_norm": 0.0011118188267573714,
      "learning_rate": 3.6508633304067895e-06,
      "loss": 0.0,
      "step": 6335
    },
    {
      "epoch": 1.8542581211589113,
      "grad_norm": 0.10409577935934067,
      "learning_rate": 3.6435469710272166e-06,
      "loss": 0.0004,
      "step": 6336
    },
    {
      "epoch": 1.8545507755340942,
      "grad_norm": 0.0006436710827983916,
      "learning_rate": 3.6362306116476446e-06,
      "loss": 0.0,
      "step": 6337
    },
    {
      "epoch": 1.8548434299092773,
      "grad_norm": 0.0019154376350343227,
      "learning_rate": 3.6289142522680717e-06,
      "loss": 0.0,
      "step": 6338
    },
    {
      "epoch": 1.8551360842844602,
      "grad_norm": 0.0009707739809527993,
      "learning_rate": 3.6215978928884988e-06,
      "loss": 0.0,
      "step": 6339
    },
    {
      "epoch": 1.855428738659643,
      "grad_norm": 0.002355606062337756,
      "learning_rate": 3.614281533508926e-06,
      "loss": 0.0,
      "step": 6340
    },
    {
      "epoch": 1.855721393034826,
      "grad_norm": 0.0009587014792487025,
      "learning_rate": 3.606965174129354e-06,
      "loss": 0.0,
      "step": 6341
    },
    {
      "epoch": 1.8560140474100089,
      "grad_norm": 0.0001421875786036253,
      "learning_rate": 3.599648814749781e-06,
      "loss": 0.0,
      "step": 6342
    },
    {
      "epoch": 1.8563067017851917,
      "grad_norm": 0.0029366235248744488,
      "learning_rate": 3.592332455370208e-06,
      "loss": 0.0,
      "step": 6343
    },
    {
      "epoch": 1.8565993561603746,
      "grad_norm": 0.00022222657571546733,
      "learning_rate": 3.585016095990635e-06,
      "loss": 0.0,
      "step": 6344
    },
    {
      "epoch": 1.8568920105355575,
      "grad_norm": 0.0001300495641771704,
      "learning_rate": 3.5776997366110627e-06,
      "loss": 0.0,
      "step": 6345
    },
    {
      "epoch": 1.8571846649107404,
      "grad_norm": 0.00048738211626186967,
      "learning_rate": 3.5703833772314898e-06,
      "loss": 0.0,
      "step": 6346
    },
    {
      "epoch": 1.8574773192859233,
      "grad_norm": 0.0005268138484098017,
      "learning_rate": 3.563067017851917e-06,
      "loss": 0.0,
      "step": 6347
    },
    {
      "epoch": 1.8577699736611062,
      "grad_norm": 0.0016816152492538095,
      "learning_rate": 3.555750658472344e-06,
      "loss": 0.0,
      "step": 6348
    },
    {
      "epoch": 1.858062628036289,
      "grad_norm": 0.0009939387673512101,
      "learning_rate": 3.548434299092772e-06,
      "loss": 0.0,
      "step": 6349
    },
    {
      "epoch": 1.858355282411472,
      "grad_norm": 0.0012612274149432778,
      "learning_rate": 3.541117939713199e-06,
      "loss": 0.0,
      "step": 6350
    },
    {
      "epoch": 1.8586479367866549,
      "grad_norm": 0.020176591351628304,
      "learning_rate": 3.533801580333626e-06,
      "loss": 0.0001,
      "step": 6351
    },
    {
      "epoch": 1.8589405911618377,
      "grad_norm": 0.0012354014907032251,
      "learning_rate": 3.5264852209540532e-06,
      "loss": 0.0,
      "step": 6352
    },
    {
      "epoch": 1.8592332455370206,
      "grad_norm": 0.0006875548860989511,
      "learning_rate": 3.519168861574481e-06,
      "loss": 0.0,
      "step": 6353
    },
    {
      "epoch": 1.8595258999122037,
      "grad_norm": 0.00011707415251294151,
      "learning_rate": 3.5118525021949083e-06,
      "loss": 0.0,
      "step": 6354
    },
    {
      "epoch": 1.8598185542873866,
      "grad_norm": 0.00039305599057115614,
      "learning_rate": 3.5045361428153354e-06,
      "loss": 0.0,
      "step": 6355
    },
    {
      "epoch": 1.8601112086625695,
      "grad_norm": 0.019264813512563705,
      "learning_rate": 3.4972197834357625e-06,
      "loss": 0.0003,
      "step": 6356
    },
    {
      "epoch": 1.8604038630377524,
      "grad_norm": 0.0008811648585833609,
      "learning_rate": 3.48990342405619e-06,
      "loss": 0.0,
      "step": 6357
    },
    {
      "epoch": 1.8606965174129353,
      "grad_norm": 0.00041657642577774823,
      "learning_rate": 3.482587064676617e-06,
      "loss": 0.0,
      "step": 6358
    },
    {
      "epoch": 1.8609891717881182,
      "grad_norm": 0.0003095753490924835,
      "learning_rate": 3.4752707052970442e-06,
      "loss": 0.0,
      "step": 6359
    },
    {
      "epoch": 1.8612818261633013,
      "grad_norm": 0.0004555534978862852,
      "learning_rate": 3.4679543459174713e-06,
      "loss": 0.0,
      "step": 6360
    },
    {
      "epoch": 1.8615744805384842,
      "grad_norm": 0.0005622628377750516,
      "learning_rate": 3.4606379865378993e-06,
      "loss": 0.0,
      "step": 6361
    },
    {
      "epoch": 1.861867134913667,
      "grad_norm": 0.0009711002348922193,
      "learning_rate": 3.4533216271583264e-06,
      "loss": 0.0,
      "step": 6362
    },
    {
      "epoch": 1.86215978928885,
      "grad_norm": 0.00030452609644271433,
      "learning_rate": 3.4460052677787535e-06,
      "loss": 0.0,
      "step": 6363
    },
    {
      "epoch": 1.8624524436640328,
      "grad_norm": 0.0008452314650639892,
      "learning_rate": 3.4386889083991806e-06,
      "loss": 0.0,
      "step": 6364
    },
    {
      "epoch": 1.8627450980392157,
      "grad_norm": 0.000371145608369261,
      "learning_rate": 3.431372549019608e-06,
      "loss": 0.0,
      "step": 6365
    },
    {
      "epoch": 1.8630377524143986,
      "grad_norm": 0.00022421056928578764,
      "learning_rate": 3.424056189640035e-06,
      "loss": 0.0,
      "step": 6366
    },
    {
      "epoch": 1.8633304067895815,
      "grad_norm": 0.001361420494504273,
      "learning_rate": 3.4167398302604623e-06,
      "loss": 0.0,
      "step": 6367
    },
    {
      "epoch": 1.8636230611647644,
      "grad_norm": 0.0004811399267055094,
      "learning_rate": 3.4094234708808894e-06,
      "loss": 0.0,
      "step": 6368
    },
    {
      "epoch": 1.8639157155399473,
      "grad_norm": 0.0016553618479520082,
      "learning_rate": 3.4021071115013174e-06,
      "loss": 0.0,
      "step": 6369
    },
    {
      "epoch": 1.8642083699151302,
      "grad_norm": 0.0007354956469498575,
      "learning_rate": 3.3947907521217445e-06,
      "loss": 0.0,
      "step": 6370
    },
    {
      "epoch": 1.864501024290313,
      "grad_norm": 0.0010148129658773541,
      "learning_rate": 3.3874743927421716e-06,
      "loss": 0.0,
      "step": 6371
    },
    {
      "epoch": 1.864793678665496,
      "grad_norm": 0.0008505440782755613,
      "learning_rate": 3.3801580333625987e-06,
      "loss": 0.0,
      "step": 6372
    },
    {
      "epoch": 1.8650863330406788,
      "grad_norm": 0.004304022528231144,
      "learning_rate": 3.3728416739830266e-06,
      "loss": 0.0001,
      "step": 6373
    },
    {
      "epoch": 1.8653789874158617,
      "grad_norm": 0.0010357032297179103,
      "learning_rate": 3.3655253146034537e-06,
      "loss": 0.0,
      "step": 6374
    },
    {
      "epoch": 1.8656716417910446,
      "grad_norm": 0.0010642132256180048,
      "learning_rate": 3.358208955223881e-06,
      "loss": 0.0,
      "step": 6375
    },
    {
      "epoch": 1.8659642961662277,
      "grad_norm": 0.00038587921881116927,
      "learning_rate": 3.350892595844308e-06,
      "loss": 0.0,
      "step": 6376
    },
    {
      "epoch": 1.8662569505414106,
      "grad_norm": 0.0003773169592022896,
      "learning_rate": 3.3435762364647355e-06,
      "loss": 0.0,
      "step": 6377
    },
    {
      "epoch": 1.8665496049165935,
      "grad_norm": 0.000283686415059492,
      "learning_rate": 3.3362598770851626e-06,
      "loss": 0.0,
      "step": 6378
    },
    {
      "epoch": 1.8668422592917764,
      "grad_norm": 0.00034093321301043034,
      "learning_rate": 3.3289435177055897e-06,
      "loss": 0.0,
      "step": 6379
    },
    {
      "epoch": 1.8671349136669593,
      "grad_norm": 0.0007197193335741758,
      "learning_rate": 3.3216271583260168e-06,
      "loss": 0.0,
      "step": 6380
    },
    {
      "epoch": 1.8674275680421424,
      "grad_norm": 0.0006039079744368792,
      "learning_rate": 3.3143107989464447e-06,
      "loss": 0.0,
      "step": 6381
    },
    {
      "epoch": 1.8677202224173253,
      "grad_norm": 0.0003744322166312486,
      "learning_rate": 3.306994439566872e-06,
      "loss": 0.0,
      "step": 6382
    },
    {
      "epoch": 1.8680128767925082,
      "grad_norm": 0.0007173375342972577,
      "learning_rate": 3.299678080187299e-06,
      "loss": 0.0,
      "step": 6383
    },
    {
      "epoch": 1.868305531167691,
      "grad_norm": 0.00014455945347435772,
      "learning_rate": 3.292361720807726e-06,
      "loss": 0.0,
      "step": 6384
    },
    {
      "epoch": 1.868598185542874,
      "grad_norm": 0.0036249211989343166,
      "learning_rate": 3.285045361428153e-06,
      "loss": 0.0,
      "step": 6385
    },
    {
      "epoch": 1.8688908399180568,
      "grad_norm": 0.012913602404296398,
      "learning_rate": 3.277729002048581e-06,
      "loss": 0.0002,
      "step": 6386
    },
    {
      "epoch": 1.8691834942932397,
      "grad_norm": 0.00016423001943621784,
      "learning_rate": 3.270412642669008e-06,
      "loss": 0.0,
      "step": 6387
    },
    {
      "epoch": 1.8694761486684226,
      "grad_norm": 0.0016108466079458594,
      "learning_rate": 3.2630962832894353e-06,
      "loss": 0.0,
      "step": 6388
    },
    {
      "epoch": 1.8697688030436055,
      "grad_norm": 0.0030950899235904217,
      "learning_rate": 3.2557799239098624e-06,
      "loss": 0.0,
      "step": 6389
    },
    {
      "epoch": 1.8700614574187884,
      "grad_norm": 0.0007110501755960286,
      "learning_rate": 3.24846356453029e-06,
      "loss": 0.0,
      "step": 6390
    },
    {
      "epoch": 1.8703541117939713,
      "grad_norm": 0.0028555227909237146,
      "learning_rate": 3.241147205150717e-06,
      "loss": 0.0,
      "step": 6391
    },
    {
      "epoch": 1.8706467661691542,
      "grad_norm": 0.000410253502195701,
      "learning_rate": 3.233830845771144e-06,
      "loss": 0.0,
      "step": 6392
    },
    {
      "epoch": 1.870939420544337,
      "grad_norm": 0.0005777594051323831,
      "learning_rate": 3.2265144863915712e-06,
      "loss": 0.0,
      "step": 6393
    },
    {
      "epoch": 1.87123207491952,
      "grad_norm": 0.007163824513554573,
      "learning_rate": 3.219198127011999e-06,
      "loss": 0.0001,
      "step": 6394
    },
    {
      "epoch": 1.8715247292947028,
      "grad_norm": 0.0002955606614705175,
      "learning_rate": 3.2118817676324263e-06,
      "loss": 0.0,
      "step": 6395
    },
    {
      "epoch": 1.8718173836698857,
      "grad_norm": 0.0015786330914124846,
      "learning_rate": 3.2045654082528534e-06,
      "loss": 0.0,
      "step": 6396
    },
    {
      "epoch": 1.8721100380450688,
      "grad_norm": 0.0005388298304751515,
      "learning_rate": 3.1972490488732805e-06,
      "loss": 0.0,
      "step": 6397
    },
    {
      "epoch": 1.8724026924202517,
      "grad_norm": 0.0013606016291305423,
      "learning_rate": 3.1899326894937084e-06,
      "loss": 0.0,
      "step": 6398
    },
    {
      "epoch": 1.8726953467954346,
      "grad_norm": 4.769907474517822,
      "learning_rate": 3.1826163301141355e-06,
      "loss": 0.165,
      "step": 6399
    },
    {
      "epoch": 1.8729880011706175,
      "grad_norm": 0.0002351369766984135,
      "learning_rate": 3.1752999707345627e-06,
      "loss": 0.0,
      "step": 6400
    },
    {
      "epoch": 1.8732806555458004,
      "grad_norm": 0.0036255517043173313,
      "learning_rate": 3.1679836113549898e-06,
      "loss": 0.0,
      "step": 6401
    },
    {
      "epoch": 1.8735733099209835,
      "grad_norm": 0.013011470437049866,
      "learning_rate": 3.1606672519754173e-06,
      "loss": 0.0002,
      "step": 6402
    },
    {
      "epoch": 1.8738659642961664,
      "grad_norm": 0.024230200797319412,
      "learning_rate": 3.1533508925958444e-06,
      "loss": 0.0003,
      "step": 6403
    },
    {
      "epoch": 1.8741586186713493,
      "grad_norm": 0.0005433835322037339,
      "learning_rate": 3.1460345332162715e-06,
      "loss": 0.0,
      "step": 6404
    },
    {
      "epoch": 1.8744512730465321,
      "grad_norm": 0.019165195524692535,
      "learning_rate": 3.1387181738366986e-06,
      "loss": 0.0003,
      "step": 6405
    },
    {
      "epoch": 1.874743927421715,
      "grad_norm": 0.000899212434887886,
      "learning_rate": 3.1314018144571265e-06,
      "loss": 0.0,
      "step": 6406
    },
    {
      "epoch": 1.875036581796898,
      "grad_norm": 0.0005501322448253632,
      "learning_rate": 3.1240854550775536e-06,
      "loss": 0.0,
      "step": 6407
    },
    {
      "epoch": 1.8753292361720808,
      "grad_norm": 0.007282521575689316,
      "learning_rate": 3.1167690956979807e-06,
      "loss": 0.0001,
      "step": 6408
    },
    {
      "epoch": 1.8756218905472637,
      "grad_norm": 0.00667223334312439,
      "learning_rate": 3.1094527363184083e-06,
      "loss": 0.0001,
      "step": 6409
    },
    {
      "epoch": 1.8759145449224466,
      "grad_norm": 0.0007393379928544164,
      "learning_rate": 3.1021363769388354e-06,
      "loss": 0.0,
      "step": 6410
    },
    {
      "epoch": 1.8762071992976295,
      "grad_norm": 0.0019803973846137524,
      "learning_rate": 3.094820017559263e-06,
      "loss": 0.0,
      "step": 6411
    },
    {
      "epoch": 1.8764998536728124,
      "grad_norm": 0.0018379775574430823,
      "learning_rate": 3.08750365817969e-06,
      "loss": 0.0,
      "step": 6412
    },
    {
      "epoch": 1.8767925080479952,
      "grad_norm": 0.0005812466260977089,
      "learning_rate": 3.080187298800117e-06,
      "loss": 0.0,
      "step": 6413
    },
    {
      "epoch": 1.8770851624231781,
      "grad_norm": 0.0013607299188151956,
      "learning_rate": 3.0728709394205442e-06,
      "loss": 0.0,
      "step": 6414
    },
    {
      "epoch": 1.877377816798361,
      "grad_norm": 0.0012550321407616138,
      "learning_rate": 3.0655545800409717e-06,
      "loss": 0.0,
      "step": 6415
    },
    {
      "epoch": 1.877670471173544,
      "grad_norm": 0.0002778058697003871,
      "learning_rate": 3.058238220661399e-06,
      "loss": 0.0,
      "step": 6416
    },
    {
      "epoch": 1.8779631255487268,
      "grad_norm": 0.00119539606384933,
      "learning_rate": 3.0509218612818264e-06,
      "loss": 0.0,
      "step": 6417
    },
    {
      "epoch": 1.87825577992391,
      "grad_norm": 0.003676731837913394,
      "learning_rate": 3.0436055019022535e-06,
      "loss": 0.0,
      "step": 6418
    },
    {
      "epoch": 1.8785484342990928,
      "grad_norm": 0.00016842203331179917,
      "learning_rate": 3.036289142522681e-06,
      "loss": 0.0,
      "step": 6419
    },
    {
      "epoch": 1.8788410886742757,
      "grad_norm": 0.008006864227354527,
      "learning_rate": 3.028972783143108e-06,
      "loss": 0.0001,
      "step": 6420
    },
    {
      "epoch": 1.8791337430494586,
      "grad_norm": 0.001742623164318502,
      "learning_rate": 3.0216564237635356e-06,
      "loss": 0.0,
      "step": 6421
    },
    {
      "epoch": 1.8794263974246415,
      "grad_norm": 0.17839489877223969,
      "learning_rate": 3.0143400643839627e-06,
      "loss": 0.0004,
      "step": 6422
    },
    {
      "epoch": 1.8797190517998246,
      "grad_norm": 0.002378784818574786,
      "learning_rate": 3.0070237050043903e-06,
      "loss": 0.0,
      "step": 6423
    },
    {
      "epoch": 1.8800117061750075,
      "grad_norm": 0.00027628379757516086,
      "learning_rate": 2.9997073456248174e-06,
      "loss": 0.0,
      "step": 6424
    },
    {
      "epoch": 1.8803043605501903,
      "grad_norm": 0.000949666544329375,
      "learning_rate": 2.9923909862452445e-06,
      "loss": 0.0,
      "step": 6425
    },
    {
      "epoch": 1.8805970149253732,
      "grad_norm": 0.0007470758864656091,
      "learning_rate": 2.9850746268656716e-06,
      "loss": 0.0,
      "step": 6426
    },
    {
      "epoch": 1.8808896693005561,
      "grad_norm": 0.009189325384795666,
      "learning_rate": 2.977758267486099e-06,
      "loss": 0.0001,
      "step": 6427
    },
    {
      "epoch": 1.881182323675739,
      "grad_norm": 9.509653318673372e-05,
      "learning_rate": 2.970441908106526e-06,
      "loss": 0.0,
      "step": 6428
    },
    {
      "epoch": 1.881474978050922,
      "grad_norm": 0.003322751959785819,
      "learning_rate": 2.9631255487269537e-06,
      "loss": 0.0,
      "step": 6429
    },
    {
      "epoch": 1.8817676324261048,
      "grad_norm": 0.0018158421153202653,
      "learning_rate": 2.955809189347381e-06,
      "loss": 0.0,
      "step": 6430
    },
    {
      "epoch": 1.8820602868012877,
      "grad_norm": 0.0018216572934761643,
      "learning_rate": 2.9484928299678084e-06,
      "loss": 0.0,
      "step": 6431
    },
    {
      "epoch": 1.8823529411764706,
      "grad_norm": 0.003540392266586423,
      "learning_rate": 2.9411764705882355e-06,
      "loss": 0.0,
      "step": 6432
    },
    {
      "epoch": 1.8826455955516535,
      "grad_norm": 0.003678417531773448,
      "learning_rate": 2.933860111208663e-06,
      "loss": 0.0001,
      "step": 6433
    },
    {
      "epoch": 1.8829382499268363,
      "grad_norm": 0.0003505812492221594,
      "learning_rate": 2.92654375182909e-06,
      "loss": 0.0,
      "step": 6434
    },
    {
      "epoch": 1.8832309043020192,
      "grad_norm": 0.0007867827080190182,
      "learning_rate": 2.919227392449517e-06,
      "loss": 0.0,
      "step": 6435
    },
    {
      "epoch": 1.8835235586772021,
      "grad_norm": 0.0008522221469320357,
      "learning_rate": 2.9119110330699443e-06,
      "loss": 0.0,
      "step": 6436
    },
    {
      "epoch": 1.883816213052385,
      "grad_norm": 0.0005372047307901084,
      "learning_rate": 2.904594673690372e-06,
      "loss": 0.0,
      "step": 6437
    },
    {
      "epoch": 1.884108867427568,
      "grad_norm": 0.003420688910409808,
      "learning_rate": 2.897278314310799e-06,
      "loss": 0.0001,
      "step": 6438
    },
    {
      "epoch": 1.884401521802751,
      "grad_norm": 0.004475302062928677,
      "learning_rate": 2.8899619549312265e-06,
      "loss": 0.0001,
      "step": 6439
    },
    {
      "epoch": 1.8846941761779339,
      "grad_norm": 0.0016268702456727624,
      "learning_rate": 2.8826455955516536e-06,
      "loss": 0.0,
      "step": 6440
    },
    {
      "epoch": 1.8849868305531168,
      "grad_norm": 0.00023823804804123938,
      "learning_rate": 2.875329236172081e-06,
      "loss": 0.0,
      "step": 6441
    },
    {
      "epoch": 1.8852794849282997,
      "grad_norm": 0.0006432412192225456,
      "learning_rate": 2.868012876792508e-06,
      "loss": 0.0,
      "step": 6442
    },
    {
      "epoch": 1.8855721393034826,
      "grad_norm": 0.0009195173042826355,
      "learning_rate": 2.8606965174129357e-06,
      "loss": 0.0,
      "step": 6443
    },
    {
      "epoch": 1.8858647936786654,
      "grad_norm": 0.0014948223251849413,
      "learning_rate": 2.853380158033363e-06,
      "loss": 0.0,
      "step": 6444
    },
    {
      "epoch": 1.8861574480538486,
      "grad_norm": 0.001303711673244834,
      "learning_rate": 2.8460637986537903e-06,
      "loss": 0.0,
      "step": 6445
    },
    {
      "epoch": 1.8864501024290314,
      "grad_norm": 0.009742318652570248,
      "learning_rate": 2.8387474392742174e-06,
      "loss": 0.0001,
      "step": 6446
    },
    {
      "epoch": 1.8867427568042143,
      "grad_norm": 0.0009691920131444931,
      "learning_rate": 2.8314310798946445e-06,
      "loss": 0.0,
      "step": 6447
    },
    {
      "epoch": 1.8870354111793972,
      "grad_norm": 0.0013085316168144345,
      "learning_rate": 2.8241147205150717e-06,
      "loss": 0.0,
      "step": 6448
    },
    {
      "epoch": 1.88732806555458,
      "grad_norm": 0.0013028222601860762,
      "learning_rate": 2.8167983611354988e-06,
      "loss": 0.0,
      "step": 6449
    },
    {
      "epoch": 1.887620719929763,
      "grad_norm": 0.000381801015464589,
      "learning_rate": 2.8094820017559263e-06,
      "loss": 0.0,
      "step": 6450
    },
    {
      "epoch": 1.8879133743049459,
      "grad_norm": 0.003299749456346035,
      "learning_rate": 2.8021656423763534e-06,
      "loss": 0.0,
      "step": 6451
    },
    {
      "epoch": 1.8882060286801288,
      "grad_norm": 0.0018888454651460052,
      "learning_rate": 2.794849282996781e-06,
      "loss": 0.0,
      "step": 6452
    },
    {
      "epoch": 1.8884986830553117,
      "grad_norm": 0.0009297673823311925,
      "learning_rate": 2.787532923617208e-06,
      "loss": 0.0,
      "step": 6453
    },
    {
      "epoch": 1.8887913374304945,
      "grad_norm": 0.0006277947686612606,
      "learning_rate": 2.7802165642376355e-06,
      "loss": 0.0,
      "step": 6454
    },
    {
      "epoch": 1.8890839918056774,
      "grad_norm": 0.0011863817926496267,
      "learning_rate": 2.7729002048580626e-06,
      "loss": 0.0,
      "step": 6455
    },
    {
      "epoch": 1.8893766461808603,
      "grad_norm": 0.0010690029012039304,
      "learning_rate": 2.76558384547849e-06,
      "loss": 0.0,
      "step": 6456
    },
    {
      "epoch": 1.8896693005560432,
      "grad_norm": 0.008763744495809078,
      "learning_rate": 2.7582674860989173e-06,
      "loss": 0.0001,
      "step": 6457
    },
    {
      "epoch": 1.889961954931226,
      "grad_norm": 0.00045829196460545063,
      "learning_rate": 2.750951126719345e-06,
      "loss": 0.0,
      "step": 6458
    },
    {
      "epoch": 1.890254609306409,
      "grad_norm": 0.0005661585601046681,
      "learning_rate": 2.743634767339772e-06,
      "loss": 0.0,
      "step": 6459
    },
    {
      "epoch": 1.890547263681592,
      "grad_norm": 0.0005291930865496397,
      "learning_rate": 2.736318407960199e-06,
      "loss": 0.0,
      "step": 6460
    },
    {
      "epoch": 1.890839918056775,
      "grad_norm": 0.0003165259840898216,
      "learning_rate": 2.729002048580626e-06,
      "loss": 0.0,
      "step": 6461
    },
    {
      "epoch": 1.8911325724319579,
      "grad_norm": 0.0029839524067938328,
      "learning_rate": 2.7216856892010536e-06,
      "loss": 0.0,
      "step": 6462
    },
    {
      "epoch": 1.8914252268071408,
      "grad_norm": 0.0020246263593435287,
      "learning_rate": 2.7143693298214807e-06,
      "loss": 0.0,
      "step": 6463
    },
    {
      "epoch": 1.8917178811823236,
      "grad_norm": 0.015400498174130917,
      "learning_rate": 2.7070529704419083e-06,
      "loss": 0.0002,
      "step": 6464
    },
    {
      "epoch": 1.8920105355575065,
      "grad_norm": 0.0014981385320425034,
      "learning_rate": 2.6997366110623354e-06,
      "loss": 0.0,
      "step": 6465
    },
    {
      "epoch": 1.8923031899326896,
      "grad_norm": 0.002146840561181307,
      "learning_rate": 2.692420251682763e-06,
      "loss": 0.0,
      "step": 6466
    },
    {
      "epoch": 1.8925958443078725,
      "grad_norm": 0.0004235255182720721,
      "learning_rate": 2.68510389230319e-06,
      "loss": 0.0,
      "step": 6467
    },
    {
      "epoch": 1.8928884986830554,
      "grad_norm": 0.0003561250341590494,
      "learning_rate": 2.6777875329236175e-06,
      "loss": 0.0,
      "step": 6468
    },
    {
      "epoch": 1.8931811530582383,
      "grad_norm": 0.0002480603870935738,
      "learning_rate": 2.6704711735440446e-06,
      "loss": 0.0,
      "step": 6469
    },
    {
      "epoch": 1.8934738074334212,
      "grad_norm": 0.0002996289695147425,
      "learning_rate": 2.6631548141644717e-06,
      "loss": 0.0,
      "step": 6470
    },
    {
      "epoch": 1.893766461808604,
      "grad_norm": 0.0016276652459055185,
      "learning_rate": 2.6558384547848993e-06,
      "loss": 0.0,
      "step": 6471
    },
    {
      "epoch": 1.894059116183787,
      "grad_norm": 0.00033184816129505634,
      "learning_rate": 2.6485220954053264e-06,
      "loss": 0.0,
      "step": 6472
    },
    {
      "epoch": 1.8943517705589699,
      "grad_norm": 0.0006256209453567863,
      "learning_rate": 2.6412057360257535e-06,
      "loss": 0.0,
      "step": 6473
    },
    {
      "epoch": 1.8946444249341527,
      "grad_norm": 0.0066751097328960896,
      "learning_rate": 2.633889376646181e-06,
      "loss": 0.0001,
      "step": 6474
    },
    {
      "epoch": 1.8949370793093356,
      "grad_norm": 0.002083531813696027,
      "learning_rate": 2.626573017266608e-06,
      "loss": 0.0,
      "step": 6475
    },
    {
      "epoch": 1.8952297336845185,
      "grad_norm": 0.004366927780210972,
      "learning_rate": 2.6192566578870356e-06,
      "loss": 0.0001,
      "step": 6476
    },
    {
      "epoch": 1.8955223880597014,
      "grad_norm": 1.804510235786438,
      "learning_rate": 2.6119402985074627e-06,
      "loss": 0.0036,
      "step": 6477
    },
    {
      "epoch": 1.8958150424348843,
      "grad_norm": 0.0002599820145405829,
      "learning_rate": 2.6046239391278903e-06,
      "loss": 0.0,
      "step": 6478
    },
    {
      "epoch": 1.8961076968100672,
      "grad_norm": 0.0017510734032839537,
      "learning_rate": 2.5973075797483174e-06,
      "loss": 0.0,
      "step": 6479
    },
    {
      "epoch": 1.89640035118525,
      "grad_norm": 0.0003297017829027027,
      "learning_rate": 2.589991220368745e-06,
      "loss": 0.0,
      "step": 6480
    },
    {
      "epoch": 1.896693005560433,
      "grad_norm": 0.004637278616428375,
      "learning_rate": 2.582674860989172e-06,
      "loss": 0.0001,
      "step": 6481
    },
    {
      "epoch": 1.896985659935616,
      "grad_norm": 0.00019260791304986924,
      "learning_rate": 2.575358501609599e-06,
      "loss": 0.0,
      "step": 6482
    },
    {
      "epoch": 1.897278314310799,
      "grad_norm": 0.0009068685467354953,
      "learning_rate": 2.568042142230026e-06,
      "loss": 0.0,
      "step": 6483
    },
    {
      "epoch": 1.8975709686859819,
      "grad_norm": 0.00039480082341469824,
      "learning_rate": 2.5607257828504537e-06,
      "loss": 0.0,
      "step": 6484
    },
    {
      "epoch": 1.8978636230611647,
      "grad_norm": 0.00045794202014803886,
      "learning_rate": 2.553409423470881e-06,
      "loss": 0.0,
      "step": 6485
    },
    {
      "epoch": 1.8981562774363476,
      "grad_norm": 0.00048717408208176494,
      "learning_rate": 2.5460930640913084e-06,
      "loss": 0.0,
      "step": 6486
    },
    {
      "epoch": 1.8984489318115307,
      "grad_norm": 0.00021634685981553048,
      "learning_rate": 2.5387767047117355e-06,
      "loss": 0.0,
      "step": 6487
    },
    {
      "epoch": 1.8987415861867136,
      "grad_norm": 0.0002700884360820055,
      "learning_rate": 2.531460345332163e-06,
      "loss": 0.0,
      "step": 6488
    },
    {
      "epoch": 1.8990342405618965,
      "grad_norm": 0.004484069999307394,
      "learning_rate": 2.52414398595259e-06,
      "loss": 0.0001,
      "step": 6489
    },
    {
      "epoch": 1.8993268949370794,
      "grad_norm": 0.00015893818635959178,
      "learning_rate": 2.5168276265730176e-06,
      "loss": 0.0,
      "step": 6490
    },
    {
      "epoch": 1.8996195493122623,
      "grad_norm": 0.001963461982086301,
      "learning_rate": 2.5095112671934447e-06,
      "loss": 0.0,
      "step": 6491
    },
    {
      "epoch": 1.8999122036874452,
      "grad_norm": 0.0012448845664039254,
      "learning_rate": 2.5021949078138722e-06,
      "loss": 0.0,
      "step": 6492
    },
    {
      "epoch": 1.900204858062628,
      "grad_norm": 0.030520202592015266,
      "learning_rate": 2.4948785484342993e-06,
      "loss": 0.0001,
      "step": 6493
    },
    {
      "epoch": 1.900497512437811,
      "grad_norm": 0.00040364472079090774,
      "learning_rate": 2.4875621890547264e-06,
      "loss": 0.0,
      "step": 6494
    },
    {
      "epoch": 1.9007901668129938,
      "grad_norm": 0.002049447502940893,
      "learning_rate": 2.4802458296751536e-06,
      "loss": 0.0,
      "step": 6495
    },
    {
      "epoch": 1.9010828211881767,
      "grad_norm": 0.0020897185895591974,
      "learning_rate": 2.472929470295581e-06,
      "loss": 0.0,
      "step": 6496
    },
    {
      "epoch": 1.9013754755633596,
      "grad_norm": 0.00046888107317499816,
      "learning_rate": 2.465613110916008e-06,
      "loss": 0.0,
      "step": 6497
    },
    {
      "epoch": 1.9016681299385425,
      "grad_norm": 0.0004110477748326957,
      "learning_rate": 2.4582967515364357e-06,
      "loss": 0.0,
      "step": 6498
    },
    {
      "epoch": 1.9019607843137254,
      "grad_norm": 0.0018982577603310347,
      "learning_rate": 2.450980392156863e-06,
      "loss": 0.0,
      "step": 6499
    },
    {
      "epoch": 1.9022534386889083,
      "grad_norm": 0.0007911332650110126,
      "learning_rate": 2.4436640327772903e-06,
      "loss": 0.0,
      "step": 6500
    },
    {
      "epoch": 1.9025460930640912,
      "grad_norm": 0.00031156413024291396,
      "learning_rate": 2.4363476733977174e-06,
      "loss": 0.0,
      "step": 6501
    },
    {
      "epoch": 1.902838747439274,
      "grad_norm": 0.0020903449039906263,
      "learning_rate": 2.429031314018145e-06,
      "loss": 0.0,
      "step": 6502
    },
    {
      "epoch": 1.9031314018144572,
      "grad_norm": 0.0002454284986015409,
      "learning_rate": 2.421714954638572e-06,
      "loss": 0.0,
      "step": 6503
    },
    {
      "epoch": 1.90342405618964,
      "grad_norm": 0.0014006092678755522,
      "learning_rate": 2.4143985952589996e-06,
      "loss": 0.0,
      "step": 6504
    },
    {
      "epoch": 1.903716710564823,
      "grad_norm": 0.001830007415264845,
      "learning_rate": 2.4070822358794267e-06,
      "loss": 0.0,
      "step": 6505
    },
    {
      "epoch": 1.9040093649400058,
      "grad_norm": 0.00019994727335870266,
      "learning_rate": 2.399765876499854e-06,
      "loss": 0.0,
      "step": 6506
    },
    {
      "epoch": 1.9043020193151887,
      "grad_norm": 0.0018469392089173198,
      "learning_rate": 2.392449517120281e-06,
      "loss": 0.0,
      "step": 6507
    },
    {
      "epoch": 1.9045946736903718,
      "grad_norm": 0.0019941171631217003,
      "learning_rate": 2.3851331577407084e-06,
      "loss": 0.0,
      "step": 6508
    },
    {
      "epoch": 1.9048873280655547,
      "grad_norm": 0.00025979202473536134,
      "learning_rate": 2.3778167983611355e-06,
      "loss": 0.0,
      "step": 6509
    },
    {
      "epoch": 1.9051799824407376,
      "grad_norm": 0.0006788742612116039,
      "learning_rate": 2.370500438981563e-06,
      "loss": 0.0,
      "step": 6510
    },
    {
      "epoch": 1.9054726368159205,
      "grad_norm": 0.0010134984040632844,
      "learning_rate": 2.36318407960199e-06,
      "loss": 0.0,
      "step": 6511
    },
    {
      "epoch": 1.9057652911911034,
      "grad_norm": 0.00046694010961800814,
      "learning_rate": 2.3558677202224173e-06,
      "loss": 0.0,
      "step": 6512
    },
    {
      "epoch": 1.9060579455662863,
      "grad_norm": 0.004774912726134062,
      "learning_rate": 2.348551360842845e-06,
      "loss": 0.0,
      "step": 6513
    },
    {
      "epoch": 1.9063505999414692,
      "grad_norm": 0.0028041843324899673,
      "learning_rate": 2.341235001463272e-06,
      "loss": 0.0,
      "step": 6514
    },
    {
      "epoch": 1.906643254316652,
      "grad_norm": 0.003318385686725378,
      "learning_rate": 2.3339186420836994e-06,
      "loss": 0.0,
      "step": 6515
    },
    {
      "epoch": 1.906935908691835,
      "grad_norm": 0.00046362687135115266,
      "learning_rate": 2.3266022827041265e-06,
      "loss": 0.0,
      "step": 6516
    },
    {
      "epoch": 1.9072285630670178,
      "grad_norm": 0.0013521397486329079,
      "learning_rate": 2.3192859233245536e-06,
      "loss": 0.0,
      "step": 6517
    },
    {
      "epoch": 1.9075212174422007,
      "grad_norm": 0.00040896268910728395,
      "learning_rate": 2.3119695639449807e-06,
      "loss": 0.0,
      "step": 6518
    },
    {
      "epoch": 1.9078138718173836,
      "grad_norm": 0.0002475320943631232,
      "learning_rate": 2.3046532045654083e-06,
      "loss": 0.0,
      "step": 6519
    },
    {
      "epoch": 1.9081065261925665,
      "grad_norm": 0.004071303643286228,
      "learning_rate": 2.2973368451858354e-06,
      "loss": 0.0001,
      "step": 6520
    },
    {
      "epoch": 1.9083991805677494,
      "grad_norm": 0.00022741260181646794,
      "learning_rate": 2.290020485806263e-06,
      "loss": 0.0,
      "step": 6521
    },
    {
      "epoch": 1.9086918349429323,
      "grad_norm": 0.0004800940223503858,
      "learning_rate": 2.28270412642669e-06,
      "loss": 0.0,
      "step": 6522
    },
    {
      "epoch": 1.9089844893181152,
      "grad_norm": 0.0002673115814104676,
      "learning_rate": 2.2753877670471175e-06,
      "loss": 0.0,
      "step": 6523
    },
    {
      "epoch": 1.9092771436932983,
      "grad_norm": 0.0007175962091423571,
      "learning_rate": 2.2680714076675446e-06,
      "loss": 0.0,
      "step": 6524
    },
    {
      "epoch": 1.9095697980684812,
      "grad_norm": 0.00019210069149266928,
      "learning_rate": 2.260755048287972e-06,
      "loss": 0.0,
      "step": 6525
    },
    {
      "epoch": 1.909862452443664,
      "grad_norm": 0.0005344034289009869,
      "learning_rate": 2.2534386889083993e-06,
      "loss": 0.0,
      "step": 6526
    },
    {
      "epoch": 1.910155106818847,
      "grad_norm": 0.000837336468975991,
      "learning_rate": 2.2461223295288268e-06,
      "loss": 0.0,
      "step": 6527
    },
    {
      "epoch": 1.9104477611940298,
      "grad_norm": 0.00025415114942006767,
      "learning_rate": 2.238805970149254e-06,
      "loss": 0.0,
      "step": 6528
    },
    {
      "epoch": 1.910740415569213,
      "grad_norm": 0.00047166517470031977,
      "learning_rate": 2.231489610769681e-06,
      "loss": 0.0,
      "step": 6529
    },
    {
      "epoch": 1.9110330699443958,
      "grad_norm": 0.00017651126836426556,
      "learning_rate": 2.224173251390108e-06,
      "loss": 0.0,
      "step": 6530
    },
    {
      "epoch": 1.9113257243195787,
      "grad_norm": 0.00020520172256510705,
      "learning_rate": 2.2168568920105356e-06,
      "loss": 0.0,
      "step": 6531
    },
    {
      "epoch": 1.9116183786947616,
      "grad_norm": 0.00011977095709880814,
      "learning_rate": 2.2095405326309627e-06,
      "loss": 0.0,
      "step": 6532
    },
    {
      "epoch": 1.9119110330699445,
      "grad_norm": 0.00024520300212316215,
      "learning_rate": 2.2022241732513903e-06,
      "loss": 0.0,
      "step": 6533
    },
    {
      "epoch": 1.9122036874451274,
      "grad_norm": 0.00042339987703599036,
      "learning_rate": 2.1949078138718174e-06,
      "loss": 0.0,
      "step": 6534
    },
    {
      "epoch": 1.9124963418203103,
      "grad_norm": 0.0014089974574744701,
      "learning_rate": 2.187591454492245e-06,
      "loss": 0.0,
      "step": 6535
    },
    {
      "epoch": 1.9127889961954931,
      "grad_norm": 0.0005294946604408324,
      "learning_rate": 2.180275095112672e-06,
      "loss": 0.0,
      "step": 6536
    },
    {
      "epoch": 1.913081650570676,
      "grad_norm": 0.0004275265382602811,
      "learning_rate": 2.1729587357330995e-06,
      "loss": 0.0,
      "step": 6537
    },
    {
      "epoch": 1.913374304945859,
      "grad_norm": 0.002226543379947543,
      "learning_rate": 2.1656423763535266e-06,
      "loss": 0.0,
      "step": 6538
    },
    {
      "epoch": 1.9136669593210418,
      "grad_norm": 0.00032171490602195263,
      "learning_rate": 2.158326016973954e-06,
      "loss": 0.0,
      "step": 6539
    },
    {
      "epoch": 1.9139596136962247,
      "grad_norm": 0.0063609592616558075,
      "learning_rate": 2.1510096575943812e-06,
      "loss": 0.0001,
      "step": 6540
    },
    {
      "epoch": 1.9142522680714076,
      "grad_norm": 0.0011484756832942367,
      "learning_rate": 2.1436932982148083e-06,
      "loss": 0.0,
      "step": 6541
    },
    {
      "epoch": 1.9145449224465905,
      "grad_norm": 0.0010393839329481125,
      "learning_rate": 2.1363769388352355e-06,
      "loss": 0.0,
      "step": 6542
    },
    {
      "epoch": 1.9148375768217734,
      "grad_norm": 0.001812055124901235,
      "learning_rate": 2.129060579455663e-06,
      "loss": 0.0,
      "step": 6543
    },
    {
      "epoch": 1.9151302311969562,
      "grad_norm": 0.0005704189534299076,
      "learning_rate": 2.12174422007609e-06,
      "loss": 0.0,
      "step": 6544
    },
    {
      "epoch": 1.9154228855721394,
      "grad_norm": 0.0007563745020888746,
      "learning_rate": 2.1144278606965176e-06,
      "loss": 0.0,
      "step": 6545
    },
    {
      "epoch": 1.9157155399473222,
      "grad_norm": 0.0011137282708659768,
      "learning_rate": 2.1071115013169447e-06,
      "loss": 0.0,
      "step": 6546
    },
    {
      "epoch": 1.9160081943225051,
      "grad_norm": 0.00035710533848032355,
      "learning_rate": 2.0997951419373722e-06,
      "loss": 0.0,
      "step": 6547
    },
    {
      "epoch": 1.916300848697688,
      "grad_norm": 0.00036031228955835104,
      "learning_rate": 2.0924787825577993e-06,
      "loss": 0.0,
      "step": 6548
    },
    {
      "epoch": 1.916593503072871,
      "grad_norm": 0.00026160478591918945,
      "learning_rate": 2.085162423178227e-06,
      "loss": 0.0,
      "step": 6549
    },
    {
      "epoch": 1.9168861574480538,
      "grad_norm": 0.005153276491910219,
      "learning_rate": 2.077846063798654e-06,
      "loss": 0.0001,
      "step": 6550
    },
    {
      "epoch": 1.917178811823237,
      "grad_norm": 0.0007959470385685563,
      "learning_rate": 2.070529704419081e-06,
      "loss": 0.0,
      "step": 6551
    },
    {
      "epoch": 1.9174714661984198,
      "grad_norm": 0.0016937382752075791,
      "learning_rate": 2.063213345039508e-06,
      "loss": 0.0,
      "step": 6552
    },
    {
      "epoch": 1.9177641205736027,
      "grad_norm": 0.0024497276172041893,
      "learning_rate": 2.0558969856599357e-06,
      "loss": 0.0,
      "step": 6553
    },
    {
      "epoch": 1.9180567749487856,
      "grad_norm": 0.0009276737691834569,
      "learning_rate": 2.048580626280363e-06,
      "loss": 0.0,
      "step": 6554
    },
    {
      "epoch": 1.9183494293239685,
      "grad_norm": 0.0032618502154946327,
      "learning_rate": 2.0412642669007903e-06,
      "loss": 0.0,
      "step": 6555
    },
    {
      "epoch": 1.9186420836991513,
      "grad_norm": 0.0019429664826020598,
      "learning_rate": 2.0339479075212174e-06,
      "loss": 0.0,
      "step": 6556
    },
    {
      "epoch": 1.9189347380743342,
      "grad_norm": 0.0017567048780620098,
      "learning_rate": 2.026631548141645e-06,
      "loss": 0.0,
      "step": 6557
    },
    {
      "epoch": 1.9192273924495171,
      "grad_norm": 0.006915146019309759,
      "learning_rate": 2.019315188762072e-06,
      "loss": 0.0001,
      "step": 6558
    },
    {
      "epoch": 1.9195200468247,
      "grad_norm": 0.0016446765512228012,
      "learning_rate": 2.0119988293824996e-06,
      "loss": 0.0,
      "step": 6559
    },
    {
      "epoch": 1.919812701199883,
      "grad_norm": 0.0006018587737344205,
      "learning_rate": 2.0046824700029267e-06,
      "loss": 0.0,
      "step": 6560
    },
    {
      "epoch": 1.9201053555750658,
      "grad_norm": 0.0001805188221624121,
      "learning_rate": 1.9973661106233542e-06,
      "loss": 0.0,
      "step": 6561
    },
    {
      "epoch": 1.9203980099502487,
      "grad_norm": 0.00033795545459724963,
      "learning_rate": 1.9900497512437813e-06,
      "loss": 0.0,
      "step": 6562
    },
    {
      "epoch": 1.9206906643254316,
      "grad_norm": 0.0005303092184476554,
      "learning_rate": 1.9827333918642084e-06,
      "loss": 0.0,
      "step": 6563
    },
    {
      "epoch": 1.9209833187006145,
      "grad_norm": 0.0004896099562756717,
      "learning_rate": 1.9754170324846355e-06,
      "loss": 0.0,
      "step": 6564
    },
    {
      "epoch": 1.9212759730757973,
      "grad_norm": 0.002939594676718116,
      "learning_rate": 1.968100673105063e-06,
      "loss": 0.0,
      "step": 6565
    },
    {
      "epoch": 1.9215686274509802,
      "grad_norm": 0.0007181447581388056,
      "learning_rate": 1.96078431372549e-06,
      "loss": 0.0,
      "step": 6566
    },
    {
      "epoch": 1.9218612818261633,
      "grad_norm": 0.0001777529832907021,
      "learning_rate": 1.9534679543459177e-06,
      "loss": 0.0,
      "step": 6567
    },
    {
      "epoch": 1.9221539362013462,
      "grad_norm": 0.0002563889720477164,
      "learning_rate": 1.946151594966345e-06,
      "loss": 0.0,
      "step": 6568
    },
    {
      "epoch": 1.9224465905765291,
      "grad_norm": 0.004232883453369141,
      "learning_rate": 1.9388352355867723e-06,
      "loss": 0.0,
      "step": 6569
    },
    {
      "epoch": 1.922739244951712,
      "grad_norm": 0.001293240231461823,
      "learning_rate": 1.9315188762071994e-06,
      "loss": 0.0,
      "step": 6570
    },
    {
      "epoch": 1.923031899326895,
      "grad_norm": 0.00044473825255408883,
      "learning_rate": 1.924202516827627e-06,
      "loss": 0.0,
      "step": 6571
    },
    {
      "epoch": 1.923324553702078,
      "grad_norm": 0.002846464514732361,
      "learning_rate": 1.916886157448054e-06,
      "loss": 0.0,
      "step": 6572
    },
    {
      "epoch": 1.9236172080772609,
      "grad_norm": 0.0003088650992140174,
      "learning_rate": 1.9095697980684816e-06,
      "loss": 0.0,
      "step": 6573
    },
    {
      "epoch": 1.9239098624524438,
      "grad_norm": 0.0007258460973389447,
      "learning_rate": 1.9022534386889085e-06,
      "loss": 0.0,
      "step": 6574
    },
    {
      "epoch": 1.9242025168276267,
      "grad_norm": 0.001201051753014326,
      "learning_rate": 1.894937079309336e-06,
      "loss": 0.0,
      "step": 6575
    },
    {
      "epoch": 1.9244951712028096,
      "grad_norm": 0.00023510832397732884,
      "learning_rate": 1.887620719929763e-06,
      "loss": 0.0,
      "step": 6576
    },
    {
      "epoch": 1.9247878255779924,
      "grad_norm": 0.0005937555688433349,
      "learning_rate": 1.8803043605501902e-06,
      "loss": 0.0,
      "step": 6577
    },
    {
      "epoch": 1.9250804799531753,
      "grad_norm": 0.00188451015856117,
      "learning_rate": 1.8729880011706175e-06,
      "loss": 0.0,
      "step": 6578
    },
    {
      "epoch": 1.9253731343283582,
      "grad_norm": 0.0023857697378844023,
      "learning_rate": 1.8656716417910446e-06,
      "loss": 0.0,
      "step": 6579
    },
    {
      "epoch": 1.925665788703541,
      "grad_norm": 0.0008184052421711385,
      "learning_rate": 1.8583552824114722e-06,
      "loss": 0.0,
      "step": 6580
    },
    {
      "epoch": 1.925958443078724,
      "grad_norm": 0.001048028003424406,
      "learning_rate": 1.8510389230318993e-06,
      "loss": 0.0,
      "step": 6581
    },
    {
      "epoch": 1.9262510974539069,
      "grad_norm": 0.003968420904129744,
      "learning_rate": 1.8437225636523268e-06,
      "loss": 0.0001,
      "step": 6582
    },
    {
      "epoch": 1.9265437518290898,
      "grad_norm": 0.0005019573145546019,
      "learning_rate": 1.8364062042727539e-06,
      "loss": 0.0,
      "step": 6583
    },
    {
      "epoch": 1.9268364062042727,
      "grad_norm": 0.00038237779517658055,
      "learning_rate": 1.8290898448931812e-06,
      "loss": 0.0,
      "step": 6584
    },
    {
      "epoch": 1.9271290605794555,
      "grad_norm": 5.985738754272461,
      "learning_rate": 1.8217734855136083e-06,
      "loss": 0.0265,
      "step": 6585
    },
    {
      "epoch": 1.9274217149546384,
      "grad_norm": 0.000331999734044075,
      "learning_rate": 1.8144571261340358e-06,
      "loss": 0.0,
      "step": 6586
    },
    {
      "epoch": 1.9277143693298213,
      "grad_norm": 0.0014998256228864193,
      "learning_rate": 1.807140766754463e-06,
      "loss": 0.0,
      "step": 6587
    },
    {
      "epoch": 1.9280070237050044,
      "grad_norm": 0.00024889782071113586,
      "learning_rate": 1.7998244073748905e-06,
      "loss": 0.0,
      "step": 6588
    },
    {
      "epoch": 1.9282996780801873,
      "grad_norm": 0.01695532537996769,
      "learning_rate": 1.7925080479953176e-06,
      "loss": 0.0001,
      "step": 6589
    },
    {
      "epoch": 1.9285923324553702,
      "grad_norm": 0.03542960807681084,
      "learning_rate": 1.7851916886157449e-06,
      "loss": 0.0001,
      "step": 6590
    },
    {
      "epoch": 1.928884986830553,
      "grad_norm": 0.00017672083049546927,
      "learning_rate": 1.777875329236172e-06,
      "loss": 0.0,
      "step": 6591
    },
    {
      "epoch": 1.929177641205736,
      "grad_norm": 0.023258518427610397,
      "learning_rate": 1.7705589698565995e-06,
      "loss": 0.0002,
      "step": 6592
    },
    {
      "epoch": 1.929470295580919,
      "grad_norm": 0.00031844244222156703,
      "learning_rate": 1.7632426104770266e-06,
      "loss": 0.0,
      "step": 6593
    },
    {
      "epoch": 1.929762949956102,
      "grad_norm": 0.0007790586678311229,
      "learning_rate": 1.7559262510974541e-06,
      "loss": 0.0,
      "step": 6594
    },
    {
      "epoch": 1.9300556043312849,
      "grad_norm": 0.0001478357007727027,
      "learning_rate": 1.7486098917178812e-06,
      "loss": 0.0,
      "step": 6595
    },
    {
      "epoch": 1.9303482587064678,
      "grad_norm": 0.00033634313149377704,
      "learning_rate": 1.7412935323383086e-06,
      "loss": 0.0,
      "step": 6596
    },
    {
      "epoch": 1.9306409130816506,
      "grad_norm": 0.01702825166285038,
      "learning_rate": 1.7339771729587357e-06,
      "loss": 0.0003,
      "step": 6597
    },
    {
      "epoch": 1.9309335674568335,
      "grad_norm": 0.008284145966172218,
      "learning_rate": 1.7266608135791632e-06,
      "loss": 0.0001,
      "step": 6598
    },
    {
      "epoch": 1.9312262218320164,
      "grad_norm": 0.0009149824036285281,
      "learning_rate": 1.7193444541995903e-06,
      "loss": 0.0,
      "step": 6599
    },
    {
      "epoch": 1.9315188762071993,
      "grad_norm": 0.0010282084112986922,
      "learning_rate": 1.7120280948200176e-06,
      "loss": 0.0,
      "step": 6600
    },
    {
      "epoch": 1.9318115305823822,
      "grad_norm": 0.00043911809916608036,
      "learning_rate": 1.7047117354404447e-06,
      "loss": 0.0,
      "step": 6601
    },
    {
      "epoch": 1.932104184957565,
      "grad_norm": 0.0004219369147904217,
      "learning_rate": 1.6973953760608722e-06,
      "loss": 0.0,
      "step": 6602
    },
    {
      "epoch": 1.932396839332748,
      "grad_norm": 0.0002833109174389392,
      "learning_rate": 1.6900790166812993e-06,
      "loss": 0.0,
      "step": 6603
    },
    {
      "epoch": 1.9326894937079309,
      "grad_norm": 0.000787951226811856,
      "learning_rate": 1.6827626573017269e-06,
      "loss": 0.0,
      "step": 6604
    },
    {
      "epoch": 1.9329821480831137,
      "grad_norm": 0.0002686723310034722,
      "learning_rate": 1.675446297922154e-06,
      "loss": 0.0,
      "step": 6605
    },
    {
      "epoch": 1.9332748024582966,
      "grad_norm": 0.0009234495228156447,
      "learning_rate": 1.6681299385425813e-06,
      "loss": 0.0,
      "step": 6606
    },
    {
      "epoch": 1.9335674568334795,
      "grad_norm": 0.0074235666543245316,
      "learning_rate": 1.6608135791630084e-06,
      "loss": 0.0001,
      "step": 6607
    },
    {
      "epoch": 1.9338601112086624,
      "grad_norm": 0.0007379117305390537,
      "learning_rate": 1.653497219783436e-06,
      "loss": 0.0,
      "step": 6608
    },
    {
      "epoch": 1.9341527655838455,
      "grad_norm": 0.0002182548923883587,
      "learning_rate": 1.646180860403863e-06,
      "loss": 0.0,
      "step": 6609
    },
    {
      "epoch": 1.9344454199590284,
      "grad_norm": 0.00016724392480682582,
      "learning_rate": 1.6388645010242905e-06,
      "loss": 0.0,
      "step": 6610
    },
    {
      "epoch": 1.9347380743342113,
      "grad_norm": 0.0007237906102091074,
      "learning_rate": 1.6315481416447176e-06,
      "loss": 0.0,
      "step": 6611
    },
    {
      "epoch": 1.9350307287093942,
      "grad_norm": 0.0016593494219705462,
      "learning_rate": 1.624231782265145e-06,
      "loss": 0.0,
      "step": 6612
    },
    {
      "epoch": 1.935323383084577,
      "grad_norm": 0.0003152602876070887,
      "learning_rate": 1.616915422885572e-06,
      "loss": 0.0,
      "step": 6613
    },
    {
      "epoch": 1.9356160374597602,
      "grad_norm": 0.00036291356082074344,
      "learning_rate": 1.6095990635059996e-06,
      "loss": 0.0,
      "step": 6614
    },
    {
      "epoch": 1.935908691834943,
      "grad_norm": 0.00020301327458582819,
      "learning_rate": 1.6022827041264267e-06,
      "loss": 0.0,
      "step": 6615
    },
    {
      "epoch": 1.936201346210126,
      "grad_norm": 0.001788858906365931,
      "learning_rate": 1.5949663447468542e-06,
      "loss": 0.0,
      "step": 6616
    },
    {
      "epoch": 1.9364940005853089,
      "grad_norm": 0.0008270929683931172,
      "learning_rate": 1.5876499853672813e-06,
      "loss": 0.0,
      "step": 6617
    },
    {
      "epoch": 1.9367866549604917,
      "grad_norm": 0.00048240655451081693,
      "learning_rate": 1.5803336259877086e-06,
      "loss": 0.0,
      "step": 6618
    },
    {
      "epoch": 1.9370793093356746,
      "grad_norm": 0.0003302799887023866,
      "learning_rate": 1.5730172666081357e-06,
      "loss": 0.0,
      "step": 6619
    },
    {
      "epoch": 1.9373719637108575,
      "grad_norm": 0.001860711956396699,
      "learning_rate": 1.5657009072285633e-06,
      "loss": 0.0,
      "step": 6620
    },
    {
      "epoch": 1.9376646180860404,
      "grad_norm": 0.0005062110722064972,
      "learning_rate": 1.5583845478489904e-06,
      "loss": 0.0,
      "step": 6621
    },
    {
      "epoch": 1.9379572724612233,
      "grad_norm": 0.0015211127465590835,
      "learning_rate": 1.5510681884694177e-06,
      "loss": 0.0,
      "step": 6622
    },
    {
      "epoch": 1.9382499268364062,
      "grad_norm": 0.0007924987585283816,
      "learning_rate": 1.543751829089845e-06,
      "loss": 0.0,
      "step": 6623
    },
    {
      "epoch": 1.938542581211589,
      "grad_norm": 0.001091635087504983,
      "learning_rate": 1.5364354697102721e-06,
      "loss": 0.0,
      "step": 6624
    },
    {
      "epoch": 1.938835235586772,
      "grad_norm": 0.000785691081546247,
      "learning_rate": 1.5291191103306994e-06,
      "loss": 0.0,
      "step": 6625
    },
    {
      "epoch": 1.9391278899619548,
      "grad_norm": 0.00026529538445174694,
      "learning_rate": 1.5218027509511267e-06,
      "loss": 0.0,
      "step": 6626
    },
    {
      "epoch": 1.9394205443371377,
      "grad_norm": 0.000439536408521235,
      "learning_rate": 1.514486391571554e-06,
      "loss": 0.0,
      "step": 6627
    },
    {
      "epoch": 1.9397131987123206,
      "grad_norm": 0.0004655602388083935,
      "learning_rate": 1.5071700321919814e-06,
      "loss": 0.0,
      "step": 6628
    },
    {
      "epoch": 1.9400058530875035,
      "grad_norm": 0.0006081527099013329,
      "learning_rate": 1.4998536728124087e-06,
      "loss": 0.0,
      "step": 6629
    },
    {
      "epoch": 1.9402985074626866,
      "grad_norm": 0.000489642086904496,
      "learning_rate": 1.4925373134328358e-06,
      "loss": 0.0,
      "step": 6630
    },
    {
      "epoch": 1.9405911618378695,
      "grad_norm": 0.0007188718882389367,
      "learning_rate": 1.485220954053263e-06,
      "loss": 0.0,
      "step": 6631
    },
    {
      "epoch": 1.9408838162130524,
      "grad_norm": 0.0002691959962248802,
      "learning_rate": 1.4779045946736904e-06,
      "loss": 0.0,
      "step": 6632
    },
    {
      "epoch": 1.9411764705882353,
      "grad_norm": 0.0006269677542150021,
      "learning_rate": 1.4705882352941177e-06,
      "loss": 0.0,
      "step": 6633
    },
    {
      "epoch": 1.9414691249634182,
      "grad_norm": 0.0014246383216232061,
      "learning_rate": 1.463271875914545e-06,
      "loss": 0.0,
      "step": 6634
    },
    {
      "epoch": 1.941761779338601,
      "grad_norm": 0.006769080180674791,
      "learning_rate": 1.4559555165349721e-06,
      "loss": 0.0001,
      "step": 6635
    },
    {
      "epoch": 1.9420544337137842,
      "grad_norm": 0.00024840165860950947,
      "learning_rate": 1.4486391571553995e-06,
      "loss": 0.0,
      "step": 6636
    },
    {
      "epoch": 1.942347088088967,
      "grad_norm": 0.00021002003632020205,
      "learning_rate": 1.4413227977758268e-06,
      "loss": 0.0,
      "step": 6637
    },
    {
      "epoch": 1.94263974246415,
      "grad_norm": 0.00024155636492650956,
      "learning_rate": 1.434006438396254e-06,
      "loss": 0.0,
      "step": 6638
    },
    {
      "epoch": 1.9429323968393328,
      "grad_norm": 0.0002558117266744375,
      "learning_rate": 1.4266900790166814e-06,
      "loss": 0.0,
      "step": 6639
    },
    {
      "epoch": 1.9432250512145157,
      "grad_norm": 0.00030932450317777693,
      "learning_rate": 1.4193737196371087e-06,
      "loss": 0.0,
      "step": 6640
    },
    {
      "epoch": 1.9435177055896986,
      "grad_norm": 0.0008397055207751691,
      "learning_rate": 1.4120573602575358e-06,
      "loss": 0.0,
      "step": 6641
    },
    {
      "epoch": 1.9438103599648815,
      "grad_norm": 0.0004229200421832502,
      "learning_rate": 1.4047410008779631e-06,
      "loss": 0.0,
      "step": 6642
    },
    {
      "epoch": 1.9441030143400644,
      "grad_norm": 0.004902367480099201,
      "learning_rate": 1.3974246414983905e-06,
      "loss": 0.0001,
      "step": 6643
    },
    {
      "epoch": 1.9443956687152473,
      "grad_norm": 0.0009248115820810199,
      "learning_rate": 1.3901082821188178e-06,
      "loss": 0.0,
      "step": 6644
    },
    {
      "epoch": 1.9446883230904302,
      "grad_norm": 4.626027584075928,
      "learning_rate": 1.382791922739245e-06,
      "loss": 0.2806,
      "step": 6645
    },
    {
      "epoch": 1.944980977465613,
      "grad_norm": 0.0015937514835968614,
      "learning_rate": 1.3754755633596724e-06,
      "loss": 0.0,
      "step": 6646
    },
    {
      "epoch": 1.945273631840796,
      "grad_norm": 0.0029478934593498707,
      "learning_rate": 1.3681592039800995e-06,
      "loss": 0.0,
      "step": 6647
    },
    {
      "epoch": 1.9455662862159788,
      "grad_norm": 0.30252787470817566,
      "learning_rate": 1.3608428446005268e-06,
      "loss": 0.0008,
      "step": 6648
    },
    {
      "epoch": 1.9458589405911617,
      "grad_norm": 0.0003999358450528234,
      "learning_rate": 1.3535264852209541e-06,
      "loss": 0.0,
      "step": 6649
    },
    {
      "epoch": 1.9461515949663446,
      "grad_norm": 0.0003845870669465512,
      "learning_rate": 1.3462101258413814e-06,
      "loss": 0.0,
      "step": 6650
    },
    {
      "epoch": 1.9464442493415277,
      "grad_norm": 0.00020116023370064795,
      "learning_rate": 1.3388937664618088e-06,
      "loss": 0.0,
      "step": 6651
    },
    {
      "epoch": 1.9467369037167106,
      "grad_norm": 0.0011880359379574656,
      "learning_rate": 1.3315774070822359e-06,
      "loss": 0.0,
      "step": 6652
    },
    {
      "epoch": 1.9470295580918935,
      "grad_norm": 0.0002459873794578016,
      "learning_rate": 1.3242610477026632e-06,
      "loss": 0.0,
      "step": 6653
    },
    {
      "epoch": 1.9473222124670764,
      "grad_norm": 0.008838320150971413,
      "learning_rate": 1.3169446883230905e-06,
      "loss": 0.0001,
      "step": 6654
    },
    {
      "epoch": 1.9476148668422593,
      "grad_norm": 0.000532762031070888,
      "learning_rate": 1.3096283289435178e-06,
      "loss": 0.0,
      "step": 6655
    },
    {
      "epoch": 1.9479075212174422,
      "grad_norm": 0.003082372946664691,
      "learning_rate": 1.3023119695639451e-06,
      "loss": 0.0,
      "step": 6656
    },
    {
      "epoch": 1.9482001755926253,
      "grad_norm": 0.0002804934047162533,
      "learning_rate": 1.2949956101843724e-06,
      "loss": 0.0,
      "step": 6657
    },
    {
      "epoch": 1.9484928299678081,
      "grad_norm": 0.001282552140764892,
      "learning_rate": 1.2876792508047995e-06,
      "loss": 0.0,
      "step": 6658
    },
    {
      "epoch": 1.948785484342991,
      "grad_norm": 0.0002798543428070843,
      "learning_rate": 1.2803628914252269e-06,
      "loss": 0.0,
      "step": 6659
    },
    {
      "epoch": 1.949078138718174,
      "grad_norm": 0.0009536589495837688,
      "learning_rate": 1.2730465320456542e-06,
      "loss": 0.0,
      "step": 6660
    },
    {
      "epoch": 1.9493707930933568,
      "grad_norm": 0.0001576505455886945,
      "learning_rate": 1.2657301726660815e-06,
      "loss": 0.0,
      "step": 6661
    },
    {
      "epoch": 1.9496634474685397,
      "grad_norm": 0.0032433916348963976,
      "learning_rate": 1.2584138132865088e-06,
      "loss": 0.0001,
      "step": 6662
    },
    {
      "epoch": 1.9499561018437226,
      "grad_norm": 0.03013523668050766,
      "learning_rate": 1.2510974539069361e-06,
      "loss": 0.0002,
      "step": 6663
    },
    {
      "epoch": 1.9502487562189055,
      "grad_norm": 0.001954001607373357,
      "learning_rate": 1.2437810945273632e-06,
      "loss": 0.0,
      "step": 6664
    },
    {
      "epoch": 1.9505414105940884,
      "grad_norm": 0.00017482234397903085,
      "learning_rate": 1.2364647351477905e-06,
      "loss": 0.0,
      "step": 6665
    },
    {
      "epoch": 1.9508340649692713,
      "grad_norm": 0.00079634680878371,
      "learning_rate": 1.2291483757682179e-06,
      "loss": 0.0,
      "step": 6666
    },
    {
      "epoch": 1.9511267193444541,
      "grad_norm": 0.0003813347721006721,
      "learning_rate": 1.2218320163886452e-06,
      "loss": 0.0,
      "step": 6667
    },
    {
      "epoch": 1.951419373719637,
      "grad_norm": 0.0006567532545886934,
      "learning_rate": 1.2145156570090725e-06,
      "loss": 0.0,
      "step": 6668
    },
    {
      "epoch": 1.95171202809482,
      "grad_norm": 0.0008832120802253485,
      "learning_rate": 1.2071992976294998e-06,
      "loss": 0.0,
      "step": 6669
    },
    {
      "epoch": 1.9520046824700028,
      "grad_norm": 0.0001882561919046566,
      "learning_rate": 1.199882938249927e-06,
      "loss": 0.0,
      "step": 6670
    },
    {
      "epoch": 1.9522973368451857,
      "grad_norm": 0.0003287769795861095,
      "learning_rate": 1.1925665788703542e-06,
      "loss": 0.0,
      "step": 6671
    },
    {
      "epoch": 1.9525899912203686,
      "grad_norm": 0.0007390181999653578,
      "learning_rate": 1.1852502194907815e-06,
      "loss": 0.0,
      "step": 6672
    },
    {
      "epoch": 1.9528826455955517,
      "grad_norm": 0.0011151289800181985,
      "learning_rate": 1.1779338601112086e-06,
      "loss": 0.0,
      "step": 6673
    },
    {
      "epoch": 1.9531752999707346,
      "grad_norm": 0.0009459562716074288,
      "learning_rate": 1.170617500731636e-06,
      "loss": 0.0,
      "step": 6674
    },
    {
      "epoch": 1.9534679543459175,
      "grad_norm": 0.00026561840786598623,
      "learning_rate": 1.1633011413520633e-06,
      "loss": 0.0,
      "step": 6675
    },
    {
      "epoch": 1.9537606087211004,
      "grad_norm": 0.007528937421739101,
      "learning_rate": 1.1559847819724904e-06,
      "loss": 0.0001,
      "step": 6676
    },
    {
      "epoch": 1.9540532630962832,
      "grad_norm": 0.0004000811604782939,
      "learning_rate": 1.1486684225929177e-06,
      "loss": 0.0,
      "step": 6677
    },
    {
      "epoch": 1.9543459174714664,
      "grad_norm": 0.0017375907627865672,
      "learning_rate": 1.141352063213345e-06,
      "loss": 0.0,
      "step": 6678
    },
    {
      "epoch": 1.9546385718466492,
      "grad_norm": 0.005005417391657829,
      "learning_rate": 1.1340357038337723e-06,
      "loss": 0.0001,
      "step": 6679
    },
    {
      "epoch": 1.9549312262218321,
      "grad_norm": 0.00020254511036910117,
      "learning_rate": 1.1267193444541996e-06,
      "loss": 0.0,
      "step": 6680
    },
    {
      "epoch": 1.955223880597015,
      "grad_norm": 0.0009266235865652561,
      "learning_rate": 1.119402985074627e-06,
      "loss": 0.0,
      "step": 6681
    },
    {
      "epoch": 1.955516534972198,
      "grad_norm": 0.001342368428595364,
      "learning_rate": 1.112086625695054e-06,
      "loss": 0.0,
      "step": 6682
    },
    {
      "epoch": 1.9558091893473808,
      "grad_norm": 0.00018644121882971376,
      "learning_rate": 1.1047702663154814e-06,
      "loss": 0.0,
      "step": 6683
    },
    {
      "epoch": 1.9561018437225637,
      "grad_norm": 0.00033372986945323646,
      "learning_rate": 1.0974539069359087e-06,
      "loss": 0.0,
      "step": 6684
    },
    {
      "epoch": 1.9563944980977466,
      "grad_norm": 0.001495027681812644,
      "learning_rate": 1.090137547556336e-06,
      "loss": 0.0,
      "step": 6685
    },
    {
      "epoch": 1.9566871524729295,
      "grad_norm": 0.0002885348512791097,
      "learning_rate": 1.0828211881767633e-06,
      "loss": 0.0,
      "step": 6686
    },
    {
      "epoch": 1.9569798068481123,
      "grad_norm": 0.017791662365198135,
      "learning_rate": 1.0755048287971906e-06,
      "loss": 0.0001,
      "step": 6687
    },
    {
      "epoch": 1.9572724612232952,
      "grad_norm": 0.00032072325120680034,
      "learning_rate": 1.0681884694176177e-06,
      "loss": 0.0,
      "step": 6688
    },
    {
      "epoch": 1.9575651155984781,
      "grad_norm": 0.0002882403787225485,
      "learning_rate": 1.060872110038045e-06,
      "loss": 0.0,
      "step": 6689
    },
    {
      "epoch": 1.957857769973661,
      "grad_norm": 0.00029905178234912455,
      "learning_rate": 1.0535557506584724e-06,
      "loss": 0.0,
      "step": 6690
    },
    {
      "epoch": 1.958150424348844,
      "grad_norm": 18.095945358276367,
      "learning_rate": 1.0462393912788997e-06,
      "loss": 0.0775,
      "step": 6691
    },
    {
      "epoch": 1.9584430787240268,
      "grad_norm": 0.0052270409651100636,
      "learning_rate": 1.038923031899327e-06,
      "loss": 0.0001,
      "step": 6692
    },
    {
      "epoch": 1.9587357330992097,
      "grad_norm": 0.0042382171377539635,
      "learning_rate": 1.031606672519754e-06,
      "loss": 0.0001,
      "step": 6693
    },
    {
      "epoch": 1.9590283874743928,
      "grad_norm": 0.0034267029259353876,
      "learning_rate": 1.0242903131401814e-06,
      "loss": 0.0001,
      "step": 6694
    },
    {
      "epoch": 1.9593210418495757,
      "grad_norm": 0.0046326094307005405,
      "learning_rate": 1.0169739537606087e-06,
      "loss": 0.0001,
      "step": 6695
    },
    {
      "epoch": 1.9596136962247586,
      "grad_norm": 0.000998358242213726,
      "learning_rate": 1.009657594381036e-06,
      "loss": 0.0,
      "step": 6696
    },
    {
      "epoch": 1.9599063505999414,
      "grad_norm": 0.0005337099428288639,
      "learning_rate": 1.0023412350014633e-06,
      "loss": 0.0,
      "step": 6697
    },
    {
      "epoch": 1.9601990049751243,
      "grad_norm": 0.00036399663076736033,
      "learning_rate": 9.950248756218907e-07,
      "loss": 0.0,
      "step": 6698
    },
    {
      "epoch": 1.9604916593503074,
      "grad_norm": 0.0004057335900142789,
      "learning_rate": 9.877085162423178e-07,
      "loss": 0.0,
      "step": 6699
    },
    {
      "epoch": 1.9607843137254903,
      "grad_norm": 0.001365336705930531,
      "learning_rate": 9.80392156862745e-07,
      "loss": 0.0,
      "step": 6700
    },
    {
      "epoch": 1.9610769681006732,
      "grad_norm": 0.009753764607012272,
      "learning_rate": 9.730757974831724e-07,
      "loss": 0.0001,
      "step": 6701
    },
    {
      "epoch": 1.9613696224758561,
      "grad_norm": 0.0016042344504967332,
      "learning_rate": 9.657594381035997e-07,
      "loss": 0.0,
      "step": 6702
    },
    {
      "epoch": 1.961662276851039,
      "grad_norm": 0.00023163444711826742,
      "learning_rate": 9.58443078724027e-07,
      "loss": 0.0,
      "step": 6703
    },
    {
      "epoch": 1.9619549312262219,
      "grad_norm": 0.0005272638518363237,
      "learning_rate": 9.511267193444542e-07,
      "loss": 0.0,
      "step": 6704
    },
    {
      "epoch": 1.9622475856014048,
      "grad_norm": 0.002219425980001688,
      "learning_rate": 9.438103599648816e-07,
      "loss": 0.0,
      "step": 6705
    },
    {
      "epoch": 1.9625402399765877,
      "grad_norm": 0.0005287674139253795,
      "learning_rate": 9.364940005853088e-07,
      "loss": 0.0,
      "step": 6706
    },
    {
      "epoch": 1.9628328943517706,
      "grad_norm": 0.0002533525403123349,
      "learning_rate": 9.291776412057361e-07,
      "loss": 0.0,
      "step": 6707
    },
    {
      "epoch": 1.9631255487269534,
      "grad_norm": 0.001309822197072208,
      "learning_rate": 9.218612818261634e-07,
      "loss": 0.0,
      "step": 6708
    },
    {
      "epoch": 1.9634182031021363,
      "grad_norm": 0.0002741070347838104,
      "learning_rate": 9.145449224465906e-07,
      "loss": 0.0,
      "step": 6709
    },
    {
      "epoch": 1.9637108574773192,
      "grad_norm": 0.00017839217616710812,
      "learning_rate": 9.072285630670179e-07,
      "loss": 0.0,
      "step": 6710
    },
    {
      "epoch": 1.964003511852502,
      "grad_norm": 0.00026237950078211725,
      "learning_rate": 8.999122036874452e-07,
      "loss": 0.0,
      "step": 6711
    },
    {
      "epoch": 1.964296166227685,
      "grad_norm": 0.0003299491945654154,
      "learning_rate": 8.925958443078724e-07,
      "loss": 0.0,
      "step": 6712
    },
    {
      "epoch": 1.9645888206028679,
      "grad_norm": 0.000435072899563238,
      "learning_rate": 8.852794849282998e-07,
      "loss": 0.0,
      "step": 6713
    },
    {
      "epoch": 1.9648814749780508,
      "grad_norm": 0.0001929103018483147,
      "learning_rate": 8.779631255487271e-07,
      "loss": 0.0,
      "step": 6714
    },
    {
      "epoch": 1.9651741293532339,
      "grad_norm": 0.0003146830713376403,
      "learning_rate": 8.706467661691543e-07,
      "loss": 0.0,
      "step": 6715
    },
    {
      "epoch": 1.9654667837284168,
      "grad_norm": 0.00029584948788397014,
      "learning_rate": 8.633304067895816e-07,
      "loss": 0.0,
      "step": 6716
    },
    {
      "epoch": 1.9657594381035997,
      "grad_norm": 0.0007161550456658006,
      "learning_rate": 8.560140474100088e-07,
      "loss": 0.0,
      "step": 6717
    },
    {
      "epoch": 1.9660520924787825,
      "grad_norm": 0.0007010952103883028,
      "learning_rate": 8.486976880304361e-07,
      "loss": 0.0,
      "step": 6718
    },
    {
      "epoch": 1.9663447468539654,
      "grad_norm": 0.00037265176069922745,
      "learning_rate": 8.413813286508634e-07,
      "loss": 0.0,
      "step": 6719
    },
    {
      "epoch": 1.9666374012291485,
      "grad_norm": 0.002495676511898637,
      "learning_rate": 8.340649692712906e-07,
      "loss": 0.0,
      "step": 6720
    },
    {
      "epoch": 1.9669300556043314,
      "grad_norm": 0.0006068368093110621,
      "learning_rate": 8.26748609891718e-07,
      "loss": 0.0,
      "step": 6721
    },
    {
      "epoch": 1.9672227099795143,
      "grad_norm": 0.0013754927786067128,
      "learning_rate": 8.194322505121453e-07,
      "loss": 0.0,
      "step": 6722
    },
    {
      "epoch": 1.9675153643546972,
      "grad_norm": 0.00012472060916479677,
      "learning_rate": 8.121158911325725e-07,
      "loss": 0.0,
      "step": 6723
    },
    {
      "epoch": 1.96780801872988,
      "grad_norm": 0.0019130449509248137,
      "learning_rate": 8.047995317529998e-07,
      "loss": 0.0,
      "step": 6724
    },
    {
      "epoch": 1.968100673105063,
      "grad_norm": 0.0007002189522609115,
      "learning_rate": 7.974831723734271e-07,
      "loss": 0.0,
      "step": 6725
    },
    {
      "epoch": 1.9683933274802459,
      "grad_norm": 0.00025963722146116197,
      "learning_rate": 7.901668129938543e-07,
      "loss": 0.0,
      "step": 6726
    },
    {
      "epoch": 1.9686859818554288,
      "grad_norm": 0.00016337841225322336,
      "learning_rate": 7.828504536142816e-07,
      "loss": 0.0,
      "step": 6727
    },
    {
      "epoch": 1.9689786362306116,
      "grad_norm": 0.00019893913122359663,
      "learning_rate": 7.755340942347088e-07,
      "loss": 0.0,
      "step": 6728
    },
    {
      "epoch": 1.9692712906057945,
      "grad_norm": 0.001312097767367959,
      "learning_rate": 7.682177348551361e-07,
      "loss": 0.0,
      "step": 6729
    },
    {
      "epoch": 1.9695639449809774,
      "grad_norm": 0.003005587961524725,
      "learning_rate": 7.609013754755634e-07,
      "loss": 0.0,
      "step": 6730
    },
    {
      "epoch": 1.9698565993561603,
      "grad_norm": 0.0006958172889426351,
      "learning_rate": 7.535850160959907e-07,
      "loss": 0.0,
      "step": 6731
    },
    {
      "epoch": 1.9701492537313432,
      "grad_norm": 0.001711481250822544,
      "learning_rate": 7.462686567164179e-07,
      "loss": 0.0,
      "step": 6732
    },
    {
      "epoch": 1.970441908106526,
      "grad_norm": 0.0006918639992363751,
      "learning_rate": 7.389522973368452e-07,
      "loss": 0.0,
      "step": 6733
    },
    {
      "epoch": 1.970734562481709,
      "grad_norm": 0.00021657461184076965,
      "learning_rate": 7.316359379572725e-07,
      "loss": 0.0,
      "step": 6734
    },
    {
      "epoch": 1.9710272168568919,
      "grad_norm": 0.004396005999296904,
      "learning_rate": 7.243195785776997e-07,
      "loss": 0.0001,
      "step": 6735
    },
    {
      "epoch": 1.971319871232075,
      "grad_norm": 0.014619813300669193,
      "learning_rate": 7.17003219198127e-07,
      "loss": 0.0002,
      "step": 6736
    },
    {
      "epoch": 1.9716125256072579,
      "grad_norm": 0.005002989899367094,
      "learning_rate": 7.096868598185544e-07,
      "loss": 0.0001,
      "step": 6737
    },
    {
      "epoch": 1.9719051799824407,
      "grad_norm": 0.000817713444121182,
      "learning_rate": 7.023705004389816e-07,
      "loss": 0.0,
      "step": 6738
    },
    {
      "epoch": 1.9721978343576236,
      "grad_norm": 0.0016037857858464122,
      "learning_rate": 6.950541410594089e-07,
      "loss": 0.0,
      "step": 6739
    },
    {
      "epoch": 1.9724904887328065,
      "grad_norm": 0.0007369687082245946,
      "learning_rate": 6.877377816798362e-07,
      "loss": 0.0,
      "step": 6740
    },
    {
      "epoch": 1.9727831431079894,
      "grad_norm": 0.0002680577745195478,
      "learning_rate": 6.804214223002634e-07,
      "loss": 0.0,
      "step": 6741
    },
    {
      "epoch": 1.9730757974831725,
      "grad_norm": 0.00020591943757608533,
      "learning_rate": 6.731050629206907e-07,
      "loss": 0.0,
      "step": 6742
    },
    {
      "epoch": 1.9733684518583554,
      "grad_norm": 0.0026272779796272516,
      "learning_rate": 6.657887035411179e-07,
      "loss": 0.0,
      "step": 6743
    },
    {
      "epoch": 1.9736611062335383,
      "grad_norm": 0.0003438132116571069,
      "learning_rate": 6.584723441615452e-07,
      "loss": 0.0,
      "step": 6744
    },
    {
      "epoch": 1.9739537606087212,
      "grad_norm": 0.0003406984906177968,
      "learning_rate": 6.511559847819726e-07,
      "loss": 0.0,
      "step": 6745
    },
    {
      "epoch": 1.974246414983904,
      "grad_norm": 0.004853005520999432,
      "learning_rate": 6.438396254023998e-07,
      "loss": 0.0001,
      "step": 6746
    },
    {
      "epoch": 1.974539069359087,
      "grad_norm": 0.00017146035679616034,
      "learning_rate": 6.365232660228271e-07,
      "loss": 0.0,
      "step": 6747
    },
    {
      "epoch": 1.9748317237342699,
      "grad_norm": 0.0004404619103297591,
      "learning_rate": 6.292069066432544e-07,
      "loss": 0.0,
      "step": 6748
    },
    {
      "epoch": 1.9751243781094527,
      "grad_norm": 0.0004179801617283374,
      "learning_rate": 6.218905472636816e-07,
      "loss": 0.0,
      "step": 6749
    },
    {
      "epoch": 1.9754170324846356,
      "grad_norm": 0.00242697075009346,
      "learning_rate": 6.145741878841089e-07,
      "loss": 0.0,
      "step": 6750
    },
    {
      "epoch": 1.9757096868598185,
      "grad_norm": 0.00041327610961161554,
      "learning_rate": 6.072578285045362e-07,
      "loss": 0.0,
      "step": 6751
    },
    {
      "epoch": 1.9760023412350014,
      "grad_norm": 0.0005576847470365465,
      "learning_rate": 5.999414691249635e-07,
      "loss": 0.0,
      "step": 6752
    },
    {
      "epoch": 1.9762949956101843,
      "grad_norm": 0.0003334633365739137,
      "learning_rate": 5.926251097453908e-07,
      "loss": 0.0,
      "step": 6753
    },
    {
      "epoch": 1.9765876499853672,
      "grad_norm": 0.0013462539063766599,
      "learning_rate": 5.85308750365818e-07,
      "loss": 0.0,
      "step": 6754
    },
    {
      "epoch": 1.97688030436055,
      "grad_norm": 0.0004790348175447434,
      "learning_rate": 5.779923909862452e-07,
      "loss": 0.0,
      "step": 6755
    },
    {
      "epoch": 1.977172958735733,
      "grad_norm": 0.0011190996738150716,
      "learning_rate": 5.706760316066725e-07,
      "loss": 0.0,
      "step": 6756
    },
    {
      "epoch": 1.9774656131109158,
      "grad_norm": 0.0007272576913237572,
      "learning_rate": 5.633596722270998e-07,
      "loss": 0.0,
      "step": 6757
    },
    {
      "epoch": 1.977758267486099,
      "grad_norm": 0.0003873982059303671,
      "learning_rate": 5.56043312847527e-07,
      "loss": 0.0,
      "step": 6758
    },
    {
      "epoch": 1.9780509218612818,
      "grad_norm": 0.0011222549946978688,
      "learning_rate": 5.487269534679543e-07,
      "loss": 0.0,
      "step": 6759
    },
    {
      "epoch": 1.9783435762364647,
      "grad_norm": 0.003980504814535379,
      "learning_rate": 5.414105940883817e-07,
      "loss": 0.0001,
      "step": 6760
    },
    {
      "epoch": 1.9786362306116476,
      "grad_norm": 0.0041741845197975636,
      "learning_rate": 5.340942347088089e-07,
      "loss": 0.0001,
      "step": 6761
    },
    {
      "epoch": 1.9789288849868305,
      "grad_norm": 0.0011988022597506642,
      "learning_rate": 5.267778753292362e-07,
      "loss": 0.0,
      "step": 6762
    },
    {
      "epoch": 1.9792215393620136,
      "grad_norm": 0.0013455471489578485,
      "learning_rate": 5.194615159496635e-07,
      "loss": 0.0,
      "step": 6763
    },
    {
      "epoch": 1.9795141937371965,
      "grad_norm": 0.0007949606515467167,
      "learning_rate": 5.121451565700907e-07,
      "loss": 0.0,
      "step": 6764
    },
    {
      "epoch": 1.9798068481123794,
      "grad_norm": 0.0003572139248717576,
      "learning_rate": 5.04828797190518e-07,
      "loss": 0.0,
      "step": 6765
    },
    {
      "epoch": 1.9800995024875623,
      "grad_norm": 0.002746115205809474,
      "learning_rate": 4.975124378109453e-07,
      "loss": 0.0,
      "step": 6766
    },
    {
      "epoch": 1.9803921568627452,
      "grad_norm": 0.4750521183013916,
      "learning_rate": 4.901960784313725e-07,
      "loss": 0.0012,
      "step": 6767
    },
    {
      "epoch": 1.980684811237928,
      "grad_norm": 0.0010382290929555893,
      "learning_rate": 4.828797190517999e-07,
      "loss": 0.0,
      "step": 6768
    },
    {
      "epoch": 1.980977465613111,
      "grad_norm": 0.00034128170227631927,
      "learning_rate": 4.755633596722271e-07,
      "loss": 0.0,
      "step": 6769
    },
    {
      "epoch": 1.9812701199882938,
      "grad_norm": 0.0018365723080933094,
      "learning_rate": 4.682470002926544e-07,
      "loss": 0.0,
      "step": 6770
    },
    {
      "epoch": 1.9815627743634767,
      "grad_norm": 0.0009086875361390412,
      "learning_rate": 4.609306409130817e-07,
      "loss": 0.0,
      "step": 6771
    },
    {
      "epoch": 1.9818554287386596,
      "grad_norm": 0.00021227910474408418,
      "learning_rate": 4.5361428153350896e-07,
      "loss": 0.0,
      "step": 6772
    },
    {
      "epoch": 1.9821480831138425,
      "grad_norm": 0.003428248455747962,
      "learning_rate": 4.462979221539362e-07,
      "loss": 0.0,
      "step": 6773
    },
    {
      "epoch": 1.9824407374890254,
      "grad_norm": 0.0003043974284082651,
      "learning_rate": 4.3898156277436353e-07,
      "loss": 0.0,
      "step": 6774
    },
    {
      "epoch": 1.9827333918642083,
      "grad_norm": 0.0013239759718999267,
      "learning_rate": 4.316652033947908e-07,
      "loss": 0.0,
      "step": 6775
    },
    {
      "epoch": 1.9830260462393912,
      "grad_norm": 0.0003209889691788703,
      "learning_rate": 4.2434884401521806e-07,
      "loss": 0.0,
      "step": 6776
    },
    {
      "epoch": 1.983318700614574,
      "grad_norm": 0.002172942040488124,
      "learning_rate": 4.170324846356453e-07,
      "loss": 0.0,
      "step": 6777
    },
    {
      "epoch": 1.983611354989757,
      "grad_norm": 0.0012680566869676113,
      "learning_rate": 4.0971612525607264e-07,
      "loss": 0.0,
      "step": 6778
    },
    {
      "epoch": 1.98390400936494,
      "grad_norm": 0.0007207993185147643,
      "learning_rate": 4.023997658764999e-07,
      "loss": 0.0,
      "step": 6779
    },
    {
      "epoch": 1.984196663740123,
      "grad_norm": 0.0004271386715117842,
      "learning_rate": 3.9508340649692716e-07,
      "loss": 0.0,
      "step": 6780
    },
    {
      "epoch": 1.9844893181153058,
      "grad_norm": 0.0005705247749574482,
      "learning_rate": 3.877670471173544e-07,
      "loss": 0.0,
      "step": 6781
    },
    {
      "epoch": 1.9847819724904887,
      "grad_norm": 0.001100093242712319,
      "learning_rate": 3.804506877377817e-07,
      "loss": 0.0,
      "step": 6782
    },
    {
      "epoch": 1.9850746268656716,
      "grad_norm": 0.0004704415041487664,
      "learning_rate": 3.7313432835820895e-07,
      "loss": 0.0,
      "step": 6783
    },
    {
      "epoch": 1.9853672812408547,
      "grad_norm": 0.00019834673730656505,
      "learning_rate": 3.6581796897863626e-07,
      "loss": 0.0,
      "step": 6784
    },
    {
      "epoch": 1.9856599356160376,
      "grad_norm": 0.004286408890038729,
      "learning_rate": 3.585016095990635e-07,
      "loss": 0.0001,
      "step": 6785
    },
    {
      "epoch": 1.9859525899912205,
      "grad_norm": 0.00397590221837163,
      "learning_rate": 3.511852502194908e-07,
      "loss": 0.0,
      "step": 6786
    },
    {
      "epoch": 1.9862452443664034,
      "grad_norm": 0.0005816129269078374,
      "learning_rate": 3.438688908399181e-07,
      "loss": 0.0,
      "step": 6787
    },
    {
      "epoch": 1.9865378987415863,
      "grad_norm": 0.00028890985413454473,
      "learning_rate": 3.3655253146034536e-07,
      "loss": 0.0,
      "step": 6788
    },
    {
      "epoch": 1.9868305531167691,
      "grad_norm": 0.0007584394188597798,
      "learning_rate": 3.292361720807726e-07,
      "loss": 0.0,
      "step": 6789
    },
    {
      "epoch": 1.987123207491952,
      "grad_norm": 0.00016148912254720926,
      "learning_rate": 3.219198127011999e-07,
      "loss": 0.0,
      "step": 6790
    },
    {
      "epoch": 1.987415861867135,
      "grad_norm": 0.0012093628756701946,
      "learning_rate": 3.146034533216272e-07,
      "loss": 0.0,
      "step": 6791
    },
    {
      "epoch": 1.9877085162423178,
      "grad_norm": 0.000516733038239181,
      "learning_rate": 3.0728709394205446e-07,
      "loss": 0.0,
      "step": 6792
    },
    {
      "epoch": 1.9880011706175007,
      "grad_norm": 0.0004513988096732646,
      "learning_rate": 2.999707345624817e-07,
      "loss": 0.0,
      "step": 6793
    },
    {
      "epoch": 1.9882938249926836,
      "grad_norm": 0.00036689036642201245,
      "learning_rate": 2.92654375182909e-07,
      "loss": 0.0,
      "step": 6794
    },
    {
      "epoch": 1.9885864793678665,
      "grad_norm": 0.004359518177807331,
      "learning_rate": 2.8533801580333625e-07,
      "loss": 0.0001,
      "step": 6795
    },
    {
      "epoch": 1.9888791337430494,
      "grad_norm": 0.0005313651636242867,
      "learning_rate": 2.780216564237635e-07,
      "loss": 0.0,
      "step": 6796
    },
    {
      "epoch": 1.9891717881182323,
      "grad_norm": 0.002708370564505458,
      "learning_rate": 2.7070529704419083e-07,
      "loss": 0.0,
      "step": 6797
    },
    {
      "epoch": 1.9894644424934151,
      "grad_norm": 0.0008690576069056988,
      "learning_rate": 2.633889376646181e-07,
      "loss": 0.0,
      "step": 6798
    },
    {
      "epoch": 1.989757096868598,
      "grad_norm": 0.0009381992858834565,
      "learning_rate": 2.5607257828504535e-07,
      "loss": 0.0,
      "step": 6799
    },
    {
      "epoch": 1.9900497512437811,
      "grad_norm": 0.0011495305225253105,
      "learning_rate": 2.4875621890547267e-07,
      "loss": 0.0,
      "step": 6800
    },
    {
      "epoch": 1.990342405618964,
      "grad_norm": 0.0016372364480048418,
      "learning_rate": 2.4143985952589993e-07,
      "loss": 0.0,
      "step": 6801
    },
    {
      "epoch": 1.990635059994147,
      "grad_norm": 0.000637992168776691,
      "learning_rate": 2.341235001463272e-07,
      "loss": 0.0,
      "step": 6802
    },
    {
      "epoch": 1.9909277143693298,
      "grad_norm": 0.008570825681090355,
      "learning_rate": 2.2680714076675448e-07,
      "loss": 0.0001,
      "step": 6803
    },
    {
      "epoch": 1.9912203687445127,
      "grad_norm": 0.000408270483603701,
      "learning_rate": 2.1949078138718177e-07,
      "loss": 0.0,
      "step": 6804
    },
    {
      "epoch": 1.9915130231196958,
      "grad_norm": 0.004543962422758341,
      "learning_rate": 2.1217442200760903e-07,
      "loss": 0.0001,
      "step": 6805
    },
    {
      "epoch": 1.9918056774948787,
      "grad_norm": 0.007146683521568775,
      "learning_rate": 2.0485806262803632e-07,
      "loss": 0.0,
      "step": 6806
    },
    {
      "epoch": 1.9920983318700616,
      "grad_norm": 0.0007193053606897593,
      "learning_rate": 1.9754170324846358e-07,
      "loss": 0.0,
      "step": 6807
    },
    {
      "epoch": 1.9923909862452445,
      "grad_norm": 0.0036593666300177574,
      "learning_rate": 1.9022534386889084e-07,
      "loss": 0.0001,
      "step": 6808
    },
    {
      "epoch": 1.9926836406204274,
      "grad_norm": 0.00022914950386621058,
      "learning_rate": 1.8290898448931813e-07,
      "loss": 0.0,
      "step": 6809
    },
    {
      "epoch": 1.9929762949956102,
      "grad_norm": 0.06791463494300842,
      "learning_rate": 1.755926251097454e-07,
      "loss": 0.0004,
      "step": 6810
    },
    {
      "epoch": 1.9932689493707931,
      "grad_norm": 0.0003776339872274548,
      "learning_rate": 1.6827626573017268e-07,
      "loss": 0.0,
      "step": 6811
    },
    {
      "epoch": 1.993561603745976,
      "grad_norm": 0.0002613313845358789,
      "learning_rate": 1.6095990635059994e-07,
      "loss": 0.0,
      "step": 6812
    },
    {
      "epoch": 1.993854258121159,
      "grad_norm": 0.0006130607798695564,
      "learning_rate": 1.5364354697102723e-07,
      "loss": 0.0,
      "step": 6813
    },
    {
      "epoch": 1.9941469124963418,
      "grad_norm": 0.0002516958920750767,
      "learning_rate": 1.463271875914545e-07,
      "loss": 0.0,
      "step": 6814
    },
    {
      "epoch": 1.9944395668715247,
      "grad_norm": 0.007760388310998678,
      "learning_rate": 1.3901082821188176e-07,
      "loss": 0.0001,
      "step": 6815
    },
    {
      "epoch": 1.9947322212467076,
      "grad_norm": 0.00039384610136039555,
      "learning_rate": 1.3169446883230904e-07,
      "loss": 0.0,
      "step": 6816
    },
    {
      "epoch": 1.9950248756218905,
      "grad_norm": 0.0006702264072373509,
      "learning_rate": 1.2437810945273633e-07,
      "loss": 0.0,
      "step": 6817
    },
    {
      "epoch": 1.9953175299970733,
      "grad_norm": 0.000267669529421255,
      "learning_rate": 1.170617500731636e-07,
      "loss": 0.0,
      "step": 6818
    },
    {
      "epoch": 1.9956101843722562,
      "grad_norm": 0.0061849369667470455,
      "learning_rate": 1.0974539069359088e-07,
      "loss": 0.0001,
      "step": 6819
    },
    {
      "epoch": 1.9959028387474391,
      "grad_norm": 0.007272869814187288,
      "learning_rate": 1.0242903131401816e-07,
      "loss": 0.0,
      "step": 6820
    },
    {
      "epoch": 1.9961954931226222,
      "grad_norm": 0.003008312778547406,
      "learning_rate": 9.511267193444542e-08,
      "loss": 0.0,
      "step": 6821
    },
    {
      "epoch": 1.9964881474978051,
      "grad_norm": 0.000999949057586491,
      "learning_rate": 8.77963125548727e-08,
      "loss": 0.0,
      "step": 6822
    },
    {
      "epoch": 1.996780801872988,
      "grad_norm": 0.00047217021347023547,
      "learning_rate": 8.047995317529997e-08,
      "loss": 0.0,
      "step": 6823
    },
    {
      "epoch": 1.997073456248171,
      "grad_norm": 0.0005122284637764096,
      "learning_rate": 7.316359379572725e-08,
      "loss": 0.0,
      "step": 6824
    },
    {
      "epoch": 1.9973661106233538,
      "grad_norm": 0.06620123982429504,
      "learning_rate": 6.584723441615452e-08,
      "loss": 0.0003,
      "step": 6825
    },
    {
      "epoch": 1.9976587649985367,
      "grad_norm": 0.0002497498644515872,
      "learning_rate": 5.85308750365818e-08,
      "loss": 0.0,
      "step": 6826
    },
    {
      "epoch": 1.9979514193737198,
      "grad_norm": 0.0007749227224849164,
      "learning_rate": 5.121451565700908e-08,
      "loss": 0.0,
      "step": 6827
    },
    {
      "epoch": 1.9982440737489027,
      "grad_norm": 0.0003250261361245066,
      "learning_rate": 4.389815627743635e-08,
      "loss": 0.0,
      "step": 6828
    },
    {
      "epoch": 1.9985367281240856,
      "grad_norm": 0.000477856578072533,
      "learning_rate": 3.6581796897863623e-08,
      "loss": 0.0,
      "step": 6829
    },
    {
      "epoch": 1.9988293824992684,
      "grad_norm": 0.0005425257841125131,
      "learning_rate": 2.92654375182909e-08,
      "loss": 0.0,
      "step": 6830
    },
    {
      "epoch": 1.9991220368744513,
      "grad_norm": 0.0002844630216713995,
      "learning_rate": 2.1949078138718174e-08,
      "loss": 0.0,
      "step": 6831
    },
    {
      "epoch": 1.9994146912496342,
      "grad_norm": 0.0004807655932381749,
      "learning_rate": 1.463271875914545e-08,
      "loss": 0.0,
      "step": 6832
    },
    {
      "epoch": 1.9997073456248171,
      "grad_norm": 0.0002281743654748425,
      "learning_rate": 7.316359379572725e-09,
      "loss": 0.0,
      "step": 6833
    },
    {
      "epoch": 2.0,
      "grad_norm": 4.235203596181236e-05,
      "learning_rate": 0.0,
      "loss": 0.0,
      "step": 6834
    }
  ],
  "logging_steps": 1,
  "max_steps": 6834,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.481922672034893e+19,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}