{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.44483985765124556,
  "eval_steps": 500,
  "global_step": 1000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00044483985765124553,
      "grad_norm": 0.5906099081039429,
      "learning_rate": 4e-05,
      "loss": 1.504,
      "step": 1
    },
    {
      "epoch": 0.0008896797153024911,
      "grad_norm": 0.8538464903831482,
      "learning_rate": 8e-05,
      "loss": 1.4735,
      "step": 2
    },
    {
      "epoch": 0.0013345195729537367,
      "grad_norm": 0.46255260705947876,
      "learning_rate": 0.00012,
      "loss": 1.3887,
      "step": 3
    },
    {
      "epoch": 0.0017793594306049821,
      "grad_norm": 0.40842387080192566,
      "learning_rate": 0.00016,
      "loss": 1.2993,
      "step": 4
    },
    {
      "epoch": 0.002224199288256228,
      "grad_norm": 0.41691651940345764,
      "learning_rate": 0.0002,
      "loss": 1.3105,
      "step": 5
    },
    {
      "epoch": 0.0026690391459074734,
      "grad_norm": 0.40152719616889954,
      "learning_rate": 0.00019979899497487438,
      "loss": 1.1942,
      "step": 6
    },
    {
      "epoch": 0.003113879003558719,
      "grad_norm": 0.39211127161979675,
      "learning_rate": 0.00019959798994974876,
      "loss": 1.1688,
      "step": 7
    },
    {
      "epoch": 0.0035587188612099642,
      "grad_norm": 0.43954452872276306,
      "learning_rate": 0.00019939698492462313,
      "loss": 1.1211,
      "step": 8
    },
    {
      "epoch": 0.00400355871886121,
      "grad_norm": 0.38755497336387634,
      "learning_rate": 0.0001991959798994975,
      "loss": 1.1039,
      "step": 9
    },
    {
      "epoch": 0.004448398576512456,
      "grad_norm": 0.3547092378139496,
      "learning_rate": 0.00019899497487437187,
      "loss": 1.126,
      "step": 10
    },
    {
      "epoch": 0.004893238434163701,
      "grad_norm": 0.37269020080566406,
      "learning_rate": 0.00019879396984924622,
      "loss": 1.1599,
      "step": 11
    },
    {
      "epoch": 0.005338078291814947,
      "grad_norm": 0.3519953191280365,
      "learning_rate": 0.00019859296482412062,
      "loss": 1.2022,
      "step": 12
    },
    {
      "epoch": 0.005782918149466192,
      "grad_norm": 0.3725995421409607,
      "learning_rate": 0.000198391959798995,
      "loss": 1.2543,
      "step": 13
    },
    {
      "epoch": 0.006227758007117438,
      "grad_norm": 0.32365697622299194,
      "learning_rate": 0.00019819095477386937,
      "loss": 1.145,
      "step": 14
    },
    {
      "epoch": 0.0066725978647686835,
      "grad_norm": 0.44411811232566833,
      "learning_rate": 0.0001979899497487437,
      "loss": 1.2582,
      "step": 15
    },
    {
      "epoch": 0.0071174377224199285,
      "grad_norm": 0.4044501781463623,
      "learning_rate": 0.0001977889447236181,
      "loss": 1.1424,
      "step": 16
    },
    {
      "epoch": 0.007562277580071174,
      "grad_norm": 0.3195618689060211,
      "learning_rate": 0.00019758793969849249,
      "loss": 1.1522,
      "step": 17
    },
    {
      "epoch": 0.00800711743772242,
      "grad_norm": 0.35177746415138245,
      "learning_rate": 0.00019738693467336683,
      "loss": 1.0172,
      "step": 18
    },
    {
      "epoch": 0.008451957295373666,
      "grad_norm": 0.37688982486724854,
      "learning_rate": 0.0001971859296482412,
      "loss": 1.1343,
      "step": 19
    },
    {
      "epoch": 0.008896797153024912,
      "grad_norm": 0.3498818278312683,
      "learning_rate": 0.0001969849246231156,
      "loss": 1.1138,
      "step": 20
    },
    {
      "epoch": 0.009341637010676156,
      "grad_norm": 0.4116188585758209,
      "learning_rate": 0.00019678391959798995,
      "loss": 1.0602,
      "step": 21
    },
    {
      "epoch": 0.009786476868327402,
      "grad_norm": 0.5222630500793457,
      "learning_rate": 0.00019658291457286432,
      "loss": 1.2328,
      "step": 22
    },
    {
      "epoch": 0.010231316725978648,
      "grad_norm": 0.5468437075614929,
      "learning_rate": 0.0001963819095477387,
      "loss": 1.1929,
      "step": 23
    },
    {
      "epoch": 0.010676156583629894,
      "grad_norm": 0.39658740162849426,
      "learning_rate": 0.0001961809045226131,
      "loss": 0.9775,
      "step": 24
    },
    {
      "epoch": 0.01112099644128114,
      "grad_norm": 0.3198809027671814,
      "learning_rate": 0.00019597989949748744,
      "loss": 1.042,
      "step": 25
    },
    {
      "epoch": 0.011565836298932384,
      "grad_norm": 0.49676308035850525,
      "learning_rate": 0.00019577889447236181,
      "loss": 1.0378,
      "step": 26
    },
    {
      "epoch": 0.01201067615658363,
      "grad_norm": 0.3704735338687897,
      "learning_rate": 0.0001955778894472362,
      "loss": 1.0303,
      "step": 27
    },
    {
      "epoch": 0.012455516014234875,
      "grad_norm": 0.33084699511528015,
      "learning_rate": 0.00019537688442211056,
      "loss": 1.0252,
      "step": 28
    },
    {
      "epoch": 0.012900355871886121,
      "grad_norm": 0.35819771885871887,
      "learning_rate": 0.00019517587939698493,
      "loss": 1.0752,
      "step": 29
    },
    {
      "epoch": 0.013345195729537367,
      "grad_norm": 0.36553651094436646,
      "learning_rate": 0.0001949748743718593,
      "loss": 1.0637,
      "step": 30
    },
    {
      "epoch": 0.013790035587188613,
      "grad_norm": 0.41030144691467285,
      "learning_rate": 0.00019477386934673368,
      "loss": 1.1858,
      "step": 31
    },
    {
      "epoch": 0.014234875444839857,
      "grad_norm": 0.3703097105026245,
      "learning_rate": 0.00019457286432160805,
      "loss": 1.0193,
      "step": 32
    },
    {
      "epoch": 0.014679715302491103,
      "grad_norm": 0.37004703283309937,
      "learning_rate": 0.00019437185929648243,
      "loss": 1.0405,
      "step": 33
    },
    {
      "epoch": 0.015124555160142349,
      "grad_norm": 0.33804813027381897,
      "learning_rate": 0.0001941708542713568,
      "loss": 0.9303,
      "step": 34
    },
    {
      "epoch": 0.015569395017793594,
      "grad_norm": 0.5308049917221069,
      "learning_rate": 0.00019396984924623117,
      "loss": 0.961,
      "step": 35
    },
    {
      "epoch": 0.01601423487544484,
      "grad_norm": 0.4099597632884979,
      "learning_rate": 0.00019376884422110552,
      "loss": 1.1084,
      "step": 36
    },
    {
      "epoch": 0.016459074733096084,
      "grad_norm": 0.4100741446018219,
      "learning_rate": 0.00019356783919597992,
      "loss": 1.063,
      "step": 37
    },
    {
      "epoch": 0.016903914590747332,
      "grad_norm": 0.3497965335845947,
      "learning_rate": 0.0001933668341708543,
      "loss": 1.0864,
      "step": 38
    },
    {
      "epoch": 0.017348754448398576,
      "grad_norm": 0.3670850694179535,
      "learning_rate": 0.00019316582914572864,
      "loss": 1.071,
      "step": 39
    },
    {
      "epoch": 0.017793594306049824,
      "grad_norm": 0.38884881138801575,
      "learning_rate": 0.000192964824120603,
      "loss": 1.0091,
      "step": 40
    },
    {
      "epoch": 0.018238434163701068,
      "grad_norm": 0.3449464738368988,
      "learning_rate": 0.0001927638190954774,
      "loss": 1.0753,
      "step": 41
    },
    {
      "epoch": 0.018683274021352312,
      "grad_norm": 0.35679587721824646,
      "learning_rate": 0.00019256281407035178,
      "loss": 1.0992,
      "step": 42
    },
    {
      "epoch": 0.01912811387900356,
      "grad_norm": 0.3696941137313843,
      "learning_rate": 0.00019236180904522613,
      "loss": 1.0413,
      "step": 43
    },
    {
      "epoch": 0.019572953736654804,
      "grad_norm": 0.37430745363235474,
      "learning_rate": 0.0001921608040201005,
      "loss": 1.0424,
      "step": 44
    },
    {
      "epoch": 0.02001779359430605,
      "grad_norm": 0.33420661091804504,
      "learning_rate": 0.0001919597989949749,
      "loss": 1.0339,
      "step": 45
    },
    {
      "epoch": 0.020462633451957295,
      "grad_norm": 0.35058847069740295,
      "learning_rate": 0.00019175879396984925,
      "loss": 1.0291,
      "step": 46
    },
    {
      "epoch": 0.02090747330960854,
      "grad_norm": 0.3396761119365692,
      "learning_rate": 0.00019155778894472362,
      "loss": 1.1198,
      "step": 47
    },
    {
      "epoch": 0.021352313167259787,
      "grad_norm": 0.3136732280254364,
      "learning_rate": 0.000191356783919598,
      "loss": 1.0288,
      "step": 48
    },
    {
      "epoch": 0.02179715302491103,
      "grad_norm": 0.5155500173568726,
      "learning_rate": 0.0001911557788944724,
      "loss": 1.0353,
      "step": 49
    },
    {
      "epoch": 0.02224199288256228,
      "grad_norm": 0.39861586689949036,
      "learning_rate": 0.00019095477386934674,
      "loss": 0.9865,
      "step": 50
    },
    {
      "epoch": 0.022686832740213523,
      "grad_norm": 0.36214709281921387,
      "learning_rate": 0.0001907537688442211,
      "loss": 1.0886,
      "step": 51
    },
    {
      "epoch": 0.023131672597864767,
      "grad_norm": 0.35173431038856506,
      "learning_rate": 0.00019055276381909548,
      "loss": 1.1213,
      "step": 52
    },
    {
      "epoch": 0.023576512455516015,
      "grad_norm": 0.34853485226631165,
      "learning_rate": 0.00019035175879396986,
      "loss": 1.0123,
      "step": 53
    },
    {
      "epoch": 0.02402135231316726,
      "grad_norm": 0.3735390305519104,
      "learning_rate": 0.00019015075376884423,
      "loss": 1.1078,
      "step": 54
    },
    {
      "epoch": 0.024466192170818506,
      "grad_norm": 0.3629351854324341,
      "learning_rate": 0.0001899497487437186,
      "loss": 1.0864,
      "step": 55
    },
    {
      "epoch": 0.02491103202846975,
      "grad_norm": 0.2966906428337097,
      "learning_rate": 0.00018974874371859298,
      "loss": 1.057,
      "step": 56
    },
    {
      "epoch": 0.025355871886120998,
      "grad_norm": 0.34373167157173157,
      "learning_rate": 0.00018954773869346732,
      "loss": 0.9951,
      "step": 57
    },
    {
      "epoch": 0.025800711743772242,
      "grad_norm": 0.454569548368454,
      "learning_rate": 0.00018934673366834172,
      "loss": 1.0618,
      "step": 58
    },
    {
      "epoch": 0.026245551601423486,
      "grad_norm": 0.358252614736557,
      "learning_rate": 0.0001891457286432161,
      "loss": 1.0242,
      "step": 59
    },
    {
      "epoch": 0.026690391459074734,
      "grad_norm": 0.33208489418029785,
      "learning_rate": 0.00018894472361809047,
      "loss": 1.0812,
      "step": 60
    },
    {
      "epoch": 0.027135231316725978,
      "grad_norm": 0.3399137556552887,
      "learning_rate": 0.00018874371859296481,
      "loss": 0.8889,
      "step": 61
    },
    {
      "epoch": 0.027580071174377226,
      "grad_norm": 0.41684690117836,
      "learning_rate": 0.00018854271356783921,
      "loss": 0.8872,
      "step": 62
    },
    {
      "epoch": 0.02802491103202847,
      "grad_norm": 0.5529223680496216,
      "learning_rate": 0.0001883417085427136,
      "loss": 0.95,
      "step": 63
    },
    {
      "epoch": 0.028469750889679714,
      "grad_norm": 0.35778260231018066,
      "learning_rate": 0.00018814070351758793,
      "loss": 1.0439,
      "step": 64
    },
    {
      "epoch": 0.02891459074733096,
      "grad_norm": 0.387169748544693,
      "learning_rate": 0.0001879396984924623,
      "loss": 1.1486,
      "step": 65
    },
    {
      "epoch": 0.029359430604982206,
      "grad_norm": 0.32700011134147644,
      "learning_rate": 0.0001877386934673367,
      "loss": 1.1394,
      "step": 66
    },
    {
      "epoch": 0.029804270462633453,
      "grad_norm": 0.45792707800865173,
      "learning_rate": 0.00018753768844221108,
      "loss": 1.1815,
      "step": 67
    },
    {
      "epoch": 0.030249110320284697,
      "grad_norm": 0.4012312591075897,
      "learning_rate": 0.00018733668341708543,
      "loss": 1.1024,
      "step": 68
    },
    {
      "epoch": 0.03069395017793594,
      "grad_norm": 0.3783579468727112,
      "learning_rate": 0.0001871356783919598,
      "loss": 0.9901,
      "step": 69
    },
    {
      "epoch": 0.03113879003558719,
      "grad_norm": 0.33383217453956604,
      "learning_rate": 0.0001869346733668342,
      "loss": 1.0832,
      "step": 70
    },
    {
      "epoch": 0.03158362989323844,
      "grad_norm": 0.41429656744003296,
      "learning_rate": 0.00018673366834170854,
      "loss": 1.0593,
      "step": 71
    },
    {
      "epoch": 0.03202846975088968,
      "grad_norm": 0.5010104179382324,
      "learning_rate": 0.00018653266331658292,
      "loss": 1.0602,
      "step": 72
    },
    {
      "epoch": 0.032473309608540925,
      "grad_norm": 0.33280321955680847,
      "learning_rate": 0.0001863316582914573,
      "loss": 1.0004,
      "step": 73
    },
    {
      "epoch": 0.03291814946619217,
      "grad_norm": 0.39433741569519043,
      "learning_rate": 0.0001861306532663317,
      "loss": 1.0071,
      "step": 74
    },
    {
      "epoch": 0.03336298932384341,
      "grad_norm": 0.3676820397377014,
      "learning_rate": 0.00018592964824120604,
      "loss": 0.8539,
      "step": 75
    },
    {
      "epoch": 0.033807829181494664,
      "grad_norm": 0.3701139986515045,
      "learning_rate": 0.0001857286432160804,
      "loss": 1.1197,
      "step": 76
    },
    {
      "epoch": 0.03425266903914591,
      "grad_norm": 0.3730039596557617,
      "learning_rate": 0.00018552763819095478,
      "loss": 1.0727,
      "step": 77
    },
    {
      "epoch": 0.03469750889679715,
      "grad_norm": 0.3400503098964691,
      "learning_rate": 0.00018532663316582915,
      "loss": 1.0385,
      "step": 78
    },
    {
      "epoch": 0.035142348754448396,
      "grad_norm": 0.36764055490493774,
      "learning_rate": 0.00018512562814070353,
      "loss": 1.0818,
      "step": 79
    },
    {
      "epoch": 0.03558718861209965,
      "grad_norm": 0.37534597516059875,
      "learning_rate": 0.0001849246231155779,
      "loss": 0.9744,
      "step": 80
    },
    {
      "epoch": 0.03603202846975089,
      "grad_norm": 0.33084404468536377,
      "learning_rate": 0.00018472361809045227,
      "loss": 0.9885,
      "step": 81
    },
    {
      "epoch": 0.036476868327402136,
      "grad_norm": 0.36626842617988586,
      "learning_rate": 0.00018452261306532662,
      "loss": 1.0442,
      "step": 82
    },
    {
      "epoch": 0.03692170818505338,
      "grad_norm": 0.5151258111000061,
      "learning_rate": 0.00018432160804020102,
      "loss": 1.0693,
      "step": 83
    },
    {
      "epoch": 0.037366548042704624,
      "grad_norm": 0.36287805438041687,
      "learning_rate": 0.0001841206030150754,
      "loss": 1.0257,
      "step": 84
    },
    {
      "epoch": 0.037811387900355875,
      "grad_norm": 0.35327619314193726,
      "learning_rate": 0.00018391959798994977,
      "loss": 0.9902,
      "step": 85
    },
    {
      "epoch": 0.03825622775800712,
      "grad_norm": 0.32582369446754456,
      "learning_rate": 0.0001837185929648241,
      "loss": 1.0425,
      "step": 86
    },
    {
      "epoch": 0.03870106761565836,
      "grad_norm": 0.38856419920921326,
      "learning_rate": 0.0001835175879396985,
      "loss": 0.9939,
      "step": 87
    },
    {
      "epoch": 0.03914590747330961,
      "grad_norm": 0.3371952772140503,
      "learning_rate": 0.00018331658291457288,
      "loss": 1.0968,
      "step": 88
    },
    {
      "epoch": 0.03959074733096085,
      "grad_norm": 0.36074796319007874,
      "learning_rate": 0.00018311557788944723,
      "loss": 1.0878,
      "step": 89
    },
    {
      "epoch": 0.0400355871886121,
      "grad_norm": 0.39816179871559143,
      "learning_rate": 0.0001829145728643216,
      "loss": 1.0504,
      "step": 90
    },
    {
      "epoch": 0.04048042704626335,
      "grad_norm": 0.4431018829345703,
      "learning_rate": 0.000182713567839196,
      "loss": 0.8954,
      "step": 91
    },
    {
      "epoch": 0.04092526690391459,
      "grad_norm": 0.3593955338001251,
      "learning_rate": 0.00018251256281407038,
      "loss": 1.0031,
      "step": 92
    },
    {
      "epoch": 0.041370106761565835,
      "grad_norm": 0.2924874722957611,
      "learning_rate": 0.00018231155778894472,
      "loss": 0.9301,
      "step": 93
    },
    {
      "epoch": 0.04181494661921708,
      "grad_norm": 0.29153579473495483,
      "learning_rate": 0.0001821105527638191,
      "loss": 0.9428,
      "step": 94
    },
    {
      "epoch": 0.04225978647686833,
      "grad_norm": 0.3542582392692566,
      "learning_rate": 0.0001819095477386935,
      "loss": 0.9941,
      "step": 95
    },
    {
      "epoch": 0.042704626334519574,
      "grad_norm": 0.3997493088245392,
      "learning_rate": 0.00018170854271356784,
      "loss": 1.0187,
      "step": 96
    },
    {
      "epoch": 0.04314946619217082,
      "grad_norm": 0.3728165626525879,
      "learning_rate": 0.00018150753768844221,
      "loss": 1.1312,
      "step": 97
    },
    {
      "epoch": 0.04359430604982206,
      "grad_norm": 0.35325804352760315,
      "learning_rate": 0.0001813065326633166,
      "loss": 0.9955,
      "step": 98
    },
    {
      "epoch": 0.04403914590747331,
      "grad_norm": 0.39074355363845825,
      "learning_rate": 0.00018110552763819096,
      "loss": 1.0561,
      "step": 99
    },
    {
      "epoch": 0.04448398576512456,
      "grad_norm": 0.346187949180603,
      "learning_rate": 0.00018090452261306533,
      "loss": 0.9568,
      "step": 100
    },
    {
      "epoch": 0.0449288256227758,
      "grad_norm": 0.314586877822876,
      "learning_rate": 0.0001807035175879397,
      "loss": 1.1153,
      "step": 101
    },
    {
      "epoch": 0.045373665480427046,
      "grad_norm": 0.3357396423816681,
      "learning_rate": 0.00018050251256281408,
      "loss": 1.0259,
      "step": 102
    },
    {
      "epoch": 0.04581850533807829,
      "grad_norm": 0.3750225007534027,
      "learning_rate": 0.00018030150753768845,
      "loss": 1.096,
      "step": 103
    },
    {
      "epoch": 0.046263345195729534,
      "grad_norm": 0.3401976227760315,
      "learning_rate": 0.00018010050251256282,
      "loss": 0.9911,
      "step": 104
    },
    {
      "epoch": 0.046708185053380785,
      "grad_norm": 0.3866574764251709,
      "learning_rate": 0.0001798994974874372,
      "loss": 0.9558,
      "step": 105
    },
    {
      "epoch": 0.04715302491103203,
      "grad_norm": 0.3394455313682556,
      "learning_rate": 0.00017969849246231157,
      "loss": 1.0463,
      "step": 106
    },
    {
      "epoch": 0.04759786476868327,
      "grad_norm": 0.36298561096191406,
      "learning_rate": 0.00017949748743718592,
      "loss": 1.0807,
      "step": 107
    },
    {
      "epoch": 0.04804270462633452,
      "grad_norm": 0.41382816433906555,
      "learning_rate": 0.00017929648241206032,
      "loss": 0.9859,
      "step": 108
    },
    {
      "epoch": 0.04848754448398576,
      "grad_norm": 0.2769756019115448,
      "learning_rate": 0.0001790954773869347,
      "loss": 1.0251,
      "step": 109
    },
    {
      "epoch": 0.04893238434163701,
      "grad_norm": 0.31425032019615173,
      "learning_rate": 0.00017889447236180906,
      "loss": 0.9642,
      "step": 110
    },
    {
      "epoch": 0.04937722419928826,
      "grad_norm": 0.3441152274608612,
      "learning_rate": 0.0001786934673366834,
      "loss": 1.0211,
      "step": 111
    },
    {
      "epoch": 0.0498220640569395,
      "grad_norm": 0.40452349185943604,
      "learning_rate": 0.0001784924623115578,
      "loss": 1.109,
      "step": 112
    },
    {
      "epoch": 0.050266903914590745,
      "grad_norm": 0.32000288367271423,
      "learning_rate": 0.00017829145728643218,
      "loss": 1.1237,
      "step": 113
    },
    {
      "epoch": 0.050711743772241996,
      "grad_norm": 0.31428951025009155,
      "learning_rate": 0.00017809045226130653,
      "loss": 1.0443,
      "step": 114
    },
    {
      "epoch": 0.05115658362989324,
      "grad_norm": 0.37118658423423767,
      "learning_rate": 0.0001778894472361809,
      "loss": 0.9842,
      "step": 115
    },
    {
      "epoch": 0.051601423487544484,
      "grad_norm": 0.3640018403530121,
      "learning_rate": 0.0001776884422110553,
      "loss": 0.942,
      "step": 116
    },
    {
      "epoch": 0.05204626334519573,
      "grad_norm": 0.3735273778438568,
      "learning_rate": 0.00017748743718592967,
      "loss": 1.0128,
      "step": 117
    },
    {
      "epoch": 0.05249110320284697,
      "grad_norm": 0.37252792716026306,
      "learning_rate": 0.00017728643216080402,
      "loss": 1.0308,
      "step": 118
    },
    {
      "epoch": 0.052935943060498224,
      "grad_norm": 0.3111330568790436,
      "learning_rate": 0.0001770854271356784,
      "loss": 1.0082,
      "step": 119
    },
    {
      "epoch": 0.05338078291814947,
      "grad_norm": 0.40468236804008484,
      "learning_rate": 0.0001768844221105528,
      "loss": 1.0273,
      "step": 120
    },
    {
      "epoch": 0.05382562277580071,
      "grad_norm": 0.3018098473548889,
      "learning_rate": 0.00017668341708542714,
      "loss": 0.9599,
      "step": 121
    },
    {
      "epoch": 0.054270462633451956,
      "grad_norm": 0.3560699224472046,
      "learning_rate": 0.0001764824120603015,
      "loss": 1.0508,
      "step": 122
    },
    {
      "epoch": 0.0547153024911032,
      "grad_norm": 0.3692304193973541,
      "learning_rate": 0.00017628140703517588,
      "loss": 0.9454,
      "step": 123
    },
    {
      "epoch": 0.05516014234875445,
      "grad_norm": 0.38090547919273376,
      "learning_rate": 0.00017608040201005026,
      "loss": 1.0469,
      "step": 124
    },
    {
      "epoch": 0.055604982206405695,
      "grad_norm": 0.37586429715156555,
      "learning_rate": 0.00017587939698492463,
      "loss": 1.0205,
      "step": 125
    },
    {
      "epoch": 0.05604982206405694,
      "grad_norm": 0.4673503041267395,
      "learning_rate": 0.000175678391959799,
      "loss": 1.1555,
      "step": 126
    },
    {
      "epoch": 0.056494661921708184,
      "grad_norm": 0.42561087012290955,
      "learning_rate": 0.00017547738693467338,
      "loss": 1.1458,
      "step": 127
    },
    {
      "epoch": 0.05693950177935943,
      "grad_norm": 0.3206027150154114,
      "learning_rate": 0.00017527638190954775,
      "loss": 1.0165,
      "step": 128
    },
    {
      "epoch": 0.05738434163701068,
      "grad_norm": 0.3541994094848633,
      "learning_rate": 0.00017507537688442212,
      "loss": 1.0377,
      "step": 129
    },
    {
      "epoch": 0.05782918149466192,
      "grad_norm": 0.4175245761871338,
      "learning_rate": 0.0001748743718592965,
      "loss": 1.0371,
      "step": 130
    },
    {
      "epoch": 0.05827402135231317,
      "grad_norm": 0.2889978587627411,
      "learning_rate": 0.00017467336683417087,
      "loss": 1.0413,
      "step": 131
    },
    {
      "epoch": 0.05871886120996441,
      "grad_norm": 0.36901023983955383,
      "learning_rate": 0.00017447236180904521,
      "loss": 1.042,
      "step": 132
    },
    {
      "epoch": 0.059163701067615655,
      "grad_norm": 0.3048287332057953,
      "learning_rate": 0.00017427135678391961,
      "loss": 1.0587,
      "step": 133
    },
    {
      "epoch": 0.059608540925266906,
      "grad_norm": 0.40319743752479553,
      "learning_rate": 0.000174070351758794,
      "loss": 1.0727,
      "step": 134
    },
    {
      "epoch": 0.06005338078291815,
      "grad_norm": 0.30953213572502136,
      "learning_rate": 0.00017386934673366836,
      "loss": 0.8989,
      "step": 135
    },
    {
      "epoch": 0.060498220640569395,
      "grad_norm": 0.3466584384441376,
      "learning_rate": 0.0001736683417085427,
      "loss": 1.0438,
      "step": 136
    },
    {
      "epoch": 0.06094306049822064,
      "grad_norm": 0.4506623148918152,
      "learning_rate": 0.0001734673366834171,
      "loss": 1.0601,
      "step": 137
    },
    {
      "epoch": 0.06138790035587188,
      "grad_norm": 0.4022829830646515,
      "learning_rate": 0.00017326633165829148,
      "loss": 1.0149,
      "step": 138
    },
    {
      "epoch": 0.061832740213523134,
      "grad_norm": 0.32555732131004333,
      "learning_rate": 0.00017306532663316582,
      "loss": 1.0512,
      "step": 139
    },
    {
      "epoch": 0.06227758007117438,
      "grad_norm": 0.33276137709617615,
      "learning_rate": 0.0001728643216080402,
      "loss": 0.9819,
      "step": 140
    },
    {
      "epoch": 0.06272241992882563,
      "grad_norm": 0.32384124398231506,
      "learning_rate": 0.0001726633165829146,
      "loss": 0.9455,
      "step": 141
    },
    {
      "epoch": 0.06316725978647687,
      "grad_norm": 0.3403018116950989,
      "learning_rate": 0.00017246231155778897,
      "loss": 0.9247,
      "step": 142
    },
    {
      "epoch": 0.06361209964412812,
      "grad_norm": 0.3674178421497345,
      "learning_rate": 0.00017226130653266332,
      "loss": 1.074,
      "step": 143
    },
    {
      "epoch": 0.06405693950177936,
      "grad_norm": 0.39759331941604614,
      "learning_rate": 0.0001720603015075377,
      "loss": 1.0983,
      "step": 144
    },
    {
      "epoch": 0.0645017793594306,
      "grad_norm": 0.3763502240180969,
      "learning_rate": 0.00017185929648241206,
      "loss": 0.9859,
      "step": 145
    },
    {
      "epoch": 0.06494661921708185,
      "grad_norm": 0.44169896841049194,
      "learning_rate": 0.00017165829145728644,
      "loss": 1.0924,
      "step": 146
    },
    {
      "epoch": 0.0653914590747331,
      "grad_norm": 0.3722588121891022,
      "learning_rate": 0.0001714572864321608,
      "loss": 0.9457,
      "step": 147
    },
    {
      "epoch": 0.06583629893238434,
      "grad_norm": 0.3731984496116638,
      "learning_rate": 0.00017125628140703518,
      "loss": 1.1355,
      "step": 148
    },
    {
      "epoch": 0.06628113879003558,
      "grad_norm": 0.29889795184135437,
      "learning_rate": 0.00017105527638190955,
      "loss": 0.9392,
      "step": 149
    },
    {
      "epoch": 0.06672597864768683,
      "grad_norm": 0.36229395866394043,
      "learning_rate": 0.00017085427135678393,
      "loss": 0.8904,
      "step": 150
    },
    {
      "epoch": 0.06717081850533808,
      "grad_norm": 0.454025536775589,
      "learning_rate": 0.0001706532663316583,
      "loss": 0.857,
      "step": 151
    },
    {
      "epoch": 0.06761565836298933,
      "grad_norm": 0.34767961502075195,
      "learning_rate": 0.00017045226130653267,
      "loss": 1.0121,
      "step": 152
    },
    {
      "epoch": 0.06806049822064057,
      "grad_norm": 0.3490912616252899,
      "learning_rate": 0.00017025125628140705,
      "loss": 0.8651,
      "step": 153
    },
    {
      "epoch": 0.06850533807829182,
      "grad_norm": 0.41010549664497375,
      "learning_rate": 0.00017005025125628142,
      "loss": 0.9816,
      "step": 154
    },
    {
      "epoch": 0.06895017793594306,
      "grad_norm": 0.348345011472702,
      "learning_rate": 0.0001698492462311558,
      "loss": 0.9971,
      "step": 155
    },
    {
      "epoch": 0.0693950177935943,
      "grad_norm": 0.3389308452606201,
      "learning_rate": 0.00016964824120603016,
      "loss": 0.9833,
      "step": 156
    },
    {
      "epoch": 0.06983985765124555,
      "grad_norm": 0.3532242178916931,
      "learning_rate": 0.0001694472361809045,
      "loss": 1.1019,
      "step": 157
    },
    {
      "epoch": 0.07028469750889679,
      "grad_norm": 0.3977639675140381,
      "learning_rate": 0.0001692462311557789,
      "loss": 1.1519,
      "step": 158
    },
    {
      "epoch": 0.07072953736654804,
      "grad_norm": 0.3684947192668915,
      "learning_rate": 0.00016904522613065328,
      "loss": 1.0164,
      "step": 159
    },
    {
      "epoch": 0.0711743772241993,
      "grad_norm": 0.3370361328125,
      "learning_rate": 0.00016884422110552766,
      "loss": 0.9961,
      "step": 160
    },
    {
      "epoch": 0.07161921708185054,
      "grad_norm": 0.3965550661087036,
      "learning_rate": 0.000168643216080402,
      "loss": 1.0542,
      "step": 161
    },
    {
      "epoch": 0.07206405693950178,
      "grad_norm": 0.39642298221588135,
      "learning_rate": 0.0001684422110552764,
      "loss": 1.0452,
      "step": 162
    },
    {
      "epoch": 0.07250889679715303,
      "grad_norm": 0.3771675229072571,
      "learning_rate": 0.00016824120603015078,
      "loss": 1.0284,
      "step": 163
    },
    {
      "epoch": 0.07295373665480427,
      "grad_norm": 0.3752223253250122,
      "learning_rate": 0.00016804020100502512,
      "loss": 0.985,
      "step": 164
    },
    {
      "epoch": 0.07339857651245552,
      "grad_norm": 0.3507731258869171,
      "learning_rate": 0.0001678391959798995,
      "loss": 0.9716,
      "step": 165
    },
    {
      "epoch": 0.07384341637010676,
      "grad_norm": 0.3909834027290344,
      "learning_rate": 0.0001676381909547739,
      "loss": 0.8909,
      "step": 166
    },
    {
      "epoch": 0.074288256227758,
      "grad_norm": 0.5423752665519714,
      "learning_rate": 0.00016743718592964827,
      "loss": 0.9922,
      "step": 167
    },
    {
      "epoch": 0.07473309608540925,
      "grad_norm": 0.39162302017211914,
      "learning_rate": 0.0001672361809045226,
      "loss": 1.0075,
      "step": 168
    },
    {
      "epoch": 0.07517793594306049,
      "grad_norm": 0.3779022693634033,
      "learning_rate": 0.00016703517587939699,
      "loss": 0.9451,
      "step": 169
    },
    {
      "epoch": 0.07562277580071175,
      "grad_norm": 0.3254469633102417,
      "learning_rate": 0.00016683417085427136,
      "loss": 0.8934,
      "step": 170
    },
    {
      "epoch": 0.076067615658363,
      "grad_norm": 0.3539400100708008,
      "learning_rate": 0.00016663316582914573,
      "loss": 0.9944,
      "step": 171
    },
    {
      "epoch": 0.07651245551601424,
      "grad_norm": 0.41099923849105835,
      "learning_rate": 0.0001664321608040201,
      "loss": 1.018,
      "step": 172
    },
    {
      "epoch": 0.07695729537366548,
      "grad_norm": 0.40103086829185486,
      "learning_rate": 0.00016623115577889448,
      "loss": 0.9354,
      "step": 173
    },
    {
      "epoch": 0.07740213523131673,
      "grad_norm": 0.3429498076438904,
      "learning_rate": 0.00016603015075376885,
      "loss": 0.9328,
      "step": 174
    },
    {
      "epoch": 0.07784697508896797,
      "grad_norm": 0.2685195505619049,
      "learning_rate": 0.00016582914572864322,
      "loss": 0.6091,
      "step": 175
    },
    {
      "epoch": 0.07829181494661921,
      "grad_norm": 0.2993873655796051,
      "learning_rate": 0.0001656281407035176,
      "loss": 1.0054,
      "step": 176
    },
    {
      "epoch": 0.07873665480427046,
      "grad_norm": 0.29764705896377563,
      "learning_rate": 0.00016542713567839197,
      "loss": 0.9484,
      "step": 177
    },
    {
      "epoch": 0.0791814946619217,
      "grad_norm": 0.33408114314079285,
      "learning_rate": 0.00016522613065326634,
      "loss": 1.0898,
      "step": 178
    },
    {
      "epoch": 0.07962633451957295,
      "grad_norm": 0.3865182399749756,
      "learning_rate": 0.00016502512562814072,
      "loss": 1.0371,
      "step": 179
    },
    {
      "epoch": 0.0800711743772242,
      "grad_norm": 0.34564244747161865,
      "learning_rate": 0.0001648241206030151,
      "loss": 0.9825,
      "step": 180
    },
    {
      "epoch": 0.08051601423487545,
      "grad_norm": 0.29182514548301697,
      "learning_rate": 0.00016462311557788946,
      "loss": 0.8901,
      "step": 181
    },
    {
      "epoch": 0.0809608540925267,
      "grad_norm": 0.35713285207748413,
      "learning_rate": 0.0001644221105527638,
      "loss": 1.0267,
      "step": 182
    },
    {
      "epoch": 0.08140569395017794,
      "grad_norm": 0.38320329785346985,
      "learning_rate": 0.0001642211055276382,
      "loss": 0.9875,
      "step": 183
    },
    {
      "epoch": 0.08185053380782918,
      "grad_norm": 0.4025184214115143,
      "learning_rate": 0.00016402010050251258,
      "loss": 1.1871,
      "step": 184
    },
    {
      "epoch": 0.08229537366548043,
      "grad_norm": 0.34486982226371765,
      "learning_rate": 0.00016381909547738695,
      "loss": 0.982,
      "step": 185
    },
    {
      "epoch": 0.08274021352313167,
      "grad_norm": 0.45098572969436646,
      "learning_rate": 0.0001636180904522613,
      "loss": 0.9052,
      "step": 186
    },
    {
      "epoch": 0.08318505338078291,
      "grad_norm": 0.4179227352142334,
      "learning_rate": 0.0001634170854271357,
      "loss": 0.9985,
      "step": 187
    },
    {
      "epoch": 0.08362989323843416,
      "grad_norm": 0.36076629161834717,
      "learning_rate": 0.00016321608040201007,
      "loss": 0.9035,
      "step": 188
    },
    {
      "epoch": 0.08407473309608542,
      "grad_norm": 0.4315630793571472,
      "learning_rate": 0.00016301507537688442,
      "loss": 0.9815,
      "step": 189
    },
    {
      "epoch": 0.08451957295373666,
      "grad_norm": 0.31583619117736816,
      "learning_rate": 0.0001628140703517588,
      "loss": 1.1461,
      "step": 190
    },
    {
      "epoch": 0.0849644128113879,
      "grad_norm": 0.3578287661075592,
      "learning_rate": 0.00016261306532663316,
      "loss": 0.9802,
      "step": 191
    },
    {
      "epoch": 0.08540925266903915,
      "grad_norm": 0.35003262758255005,
      "learning_rate": 0.00016241206030150756,
      "loss": 0.9406,
      "step": 192
    },
    {
      "epoch": 0.08585409252669039,
      "grad_norm": 0.33165210485458374,
      "learning_rate": 0.0001622110552763819,
      "loss": 1.0484,
      "step": 193
    },
    {
      "epoch": 0.08629893238434164,
      "grad_norm": 0.33711302280426025,
      "learning_rate": 0.00016201005025125628,
      "loss": 1.0255,
      "step": 194
    },
    {
      "epoch": 0.08674377224199288,
      "grad_norm": 0.3443413972854614,
      "learning_rate": 0.00016180904522613066,
      "loss": 0.9512,
      "step": 195
    },
    {
      "epoch": 0.08718861209964412,
      "grad_norm": 0.5403580069541931,
      "learning_rate": 0.00016160804020100503,
      "loss": 1.0158,
      "step": 196
    },
    {
      "epoch": 0.08763345195729537,
      "grad_norm": 0.40549829602241516,
      "learning_rate": 0.0001614070351758794,
      "loss": 1.0102,
      "step": 197
    },
    {
      "epoch": 0.08807829181494661,
      "grad_norm": 0.43879571557044983,
      "learning_rate": 0.00016120603015075378,
      "loss": 1.0015,
      "step": 198
    },
    {
      "epoch": 0.08852313167259787,
      "grad_norm": 0.3620396852493286,
      "learning_rate": 0.00016100502512562815,
      "loss": 1.0248,
      "step": 199
    },
    {
      "epoch": 0.08896797153024912,
      "grad_norm": 0.339257150888443,
      "learning_rate": 0.00016080402010050252,
      "loss": 0.9548,
      "step": 200
    },
    {
      "epoch": 0.08941281138790036,
      "grad_norm": 0.3820551037788391,
      "learning_rate": 0.0001606030150753769,
      "loss": 1.0026,
      "step": 201
    },
    {
      "epoch": 0.0898576512455516,
      "grad_norm": 0.4135708808898926,
      "learning_rate": 0.00016040201005025127,
      "loss": 1.0476,
      "step": 202
    },
    {
      "epoch": 0.09030249110320285,
      "grad_norm": 0.3772624731063843,
      "learning_rate": 0.00016020100502512564,
      "loss": 0.8981,
      "step": 203
    },
    {
      "epoch": 0.09074733096085409,
      "grad_norm": 0.42693498730659485,
      "learning_rate": 0.00016,
      "loss": 1.3064,
      "step": 204
    },
    {
      "epoch": 0.09119217081850534,
      "grad_norm": 0.2962593138217926,
      "learning_rate": 0.00015979899497487439,
      "loss": 1.0059,
      "step": 205
    },
    {
      "epoch": 0.09163701067615658,
      "grad_norm": 0.3942723274230957,
      "learning_rate": 0.00015959798994974876,
      "loss": 0.9041,
      "step": 206
    },
    {
      "epoch": 0.09208185053380782,
      "grad_norm": 0.34500858187675476,
      "learning_rate": 0.0001593969849246231,
      "loss": 0.9624,
      "step": 207
    },
    {
      "epoch": 0.09252669039145907,
      "grad_norm": 0.41078099608421326,
      "learning_rate": 0.0001591959798994975,
      "loss": 1.0105,
      "step": 208
    },
    {
      "epoch": 0.09297153024911033,
      "grad_norm": 0.3484095335006714,
      "learning_rate": 0.00015899497487437188,
      "loss": 1.0591,
      "step": 209
    },
    {
      "epoch": 0.09341637010676157,
      "grad_norm": 0.3550896644592285,
      "learning_rate": 0.00015879396984924625,
      "loss": 1.0196,
      "step": 210
    },
    {
      "epoch": 0.09386120996441281,
      "grad_norm": 0.3832697570323944,
      "learning_rate": 0.0001585929648241206,
      "loss": 1.048,
      "step": 211
    },
    {
      "epoch": 0.09430604982206406,
      "grad_norm": 0.34110692143440247,
      "learning_rate": 0.000158391959798995,
      "loss": 1.0706,
      "step": 212
    },
    {
      "epoch": 0.0947508896797153,
      "grad_norm": 0.3769712746143341,
      "learning_rate": 0.00015819095477386937,
      "loss": 0.9514,
      "step": 213
    },
    {
      "epoch": 0.09519572953736655,
      "grad_norm": 0.3767399489879608,
      "learning_rate": 0.00015798994974874372,
      "loss": 0.9611,
      "step": 214
    },
    {
      "epoch": 0.09564056939501779,
      "grad_norm": 0.3955567479133606,
      "learning_rate": 0.0001577889447236181,
      "loss": 0.9683,
      "step": 215
    },
    {
      "epoch": 0.09608540925266904,
      "grad_norm": 0.4317300021648407,
      "learning_rate": 0.00015758793969849246,
      "loss": 0.957,
      "step": 216
    },
    {
      "epoch": 0.09653024911032028,
      "grad_norm": 0.4412591755390167,
      "learning_rate": 0.00015738693467336686,
      "loss": 1.0473,
      "step": 217
    },
    {
      "epoch": 0.09697508896797152,
      "grad_norm": 0.37174472212791443,
      "learning_rate": 0.0001571859296482412,
      "loss": 0.9258,
      "step": 218
    },
    {
      "epoch": 0.09741992882562278,
      "grad_norm": 0.4425841271877289,
      "learning_rate": 0.00015698492462311558,
      "loss": 1.0147,
      "step": 219
    },
    {
      "epoch": 0.09786476868327403,
      "grad_norm": 0.40715131163597107,
      "learning_rate": 0.00015678391959798995,
      "loss": 1.0028,
      "step": 220
    },
    {
      "epoch": 0.09830960854092527,
      "grad_norm": 0.3638611435890198,
      "learning_rate": 0.00015658291457286433,
      "loss": 1.1178,
      "step": 221
    },
    {
      "epoch": 0.09875444839857651,
      "grad_norm": 0.3498910963535309,
      "learning_rate": 0.0001563819095477387,
      "loss": 1.0467,
      "step": 222
    },
    {
      "epoch": 0.09919928825622776,
      "grad_norm": 0.36899253726005554,
      "learning_rate": 0.00015618090452261307,
      "loss": 1.0702,
      "step": 223
    },
    {
      "epoch": 0.099644128113879,
      "grad_norm": 0.398636132478714,
      "learning_rate": 0.00015597989949748745,
      "loss": 0.9641,
      "step": 224
    },
    {
      "epoch": 0.10008896797153025,
      "grad_norm": 0.3399724066257477,
      "learning_rate": 0.00015577889447236182,
      "loss": 0.9786,
      "step": 225
    },
    {
      "epoch": 0.10053380782918149,
      "grad_norm": 0.3582400679588318,
      "learning_rate": 0.0001555778894472362,
      "loss": 0.9441,
      "step": 226
    },
    {
      "epoch": 0.10097864768683273,
      "grad_norm": 0.365993469953537,
      "learning_rate": 0.00015537688442211056,
      "loss": 0.9669,
      "step": 227
    },
    {
      "epoch": 0.10142348754448399,
      "grad_norm": 0.46609994769096375,
      "learning_rate": 0.00015517587939698494,
      "loss": 1.0387,
      "step": 228
    },
    {
      "epoch": 0.10186832740213524,
      "grad_norm": 0.4048779010772705,
      "learning_rate": 0.0001549748743718593,
      "loss": 0.9808,
      "step": 229
    },
    {
      "epoch": 0.10231316725978648,
      "grad_norm": 0.3965264856815338,
      "learning_rate": 0.00015477386934673368,
      "loss": 1.0191,
      "step": 230
    },
    {
      "epoch": 0.10275800711743772,
      "grad_norm": 0.379210501909256,
      "learning_rate": 0.00015457286432160806,
      "loss": 0.9608,
      "step": 231
    },
    {
      "epoch": 0.10320284697508897,
      "grad_norm": 0.4589402675628662,
      "learning_rate": 0.0001543718592964824,
      "loss": 0.9267,
      "step": 232
    },
    {
      "epoch": 0.10364768683274021,
      "grad_norm": 0.3889247179031372,
      "learning_rate": 0.0001541708542713568,
      "loss": 1.0115,
      "step": 233
    },
    {
      "epoch": 0.10409252669039146,
      "grad_norm": 0.3973037004470825,
      "learning_rate": 0.00015396984924623117,
      "loss": 0.9967,
      "step": 234
    },
    {
      "epoch": 0.1045373665480427,
      "grad_norm": 0.35045430064201355,
      "learning_rate": 0.00015376884422110555,
      "loss": 0.9899,
      "step": 235
    },
    {
      "epoch": 0.10498220640569395,
      "grad_norm": 0.36741137504577637,
      "learning_rate": 0.0001535678391959799,
      "loss": 1.0016,
      "step": 236
    },
    {
      "epoch": 0.10542704626334519,
      "grad_norm": 0.4001176953315735,
      "learning_rate": 0.00015336683417085427,
      "loss": 1.0941,
      "step": 237
    },
    {
      "epoch": 0.10587188612099645,
      "grad_norm": 0.3526718020439148,
      "learning_rate": 0.00015316582914572867,
      "loss": 0.9905,
      "step": 238
    },
    {
      "epoch": 0.10631672597864769,
      "grad_norm": 0.39122429490089417,
      "learning_rate": 0.000152964824120603,
      "loss": 1.0116,
      "step": 239
    },
    {
      "epoch": 0.10676156583629894,
      "grad_norm": 0.36022770404815674,
      "learning_rate": 0.00015276381909547739,
      "loss": 0.9501,
      "step": 240
    },
    {
      "epoch": 0.10720640569395018,
      "grad_norm": 0.3478608727455139,
      "learning_rate": 0.00015256281407035176,
      "loss": 1.0017,
      "step": 241
    },
    {
      "epoch": 0.10765124555160142,
      "grad_norm": 0.36006009578704834,
      "learning_rate": 0.00015236180904522613,
      "loss": 0.935,
      "step": 242
    },
    {
      "epoch": 0.10809608540925267,
      "grad_norm": 0.3560234606266022,
      "learning_rate": 0.0001521608040201005,
      "loss": 0.9552,
      "step": 243
    },
    {
      "epoch": 0.10854092526690391,
      "grad_norm": 0.6785101890563965,
      "learning_rate": 0.00015195979899497488,
      "loss": 0.9972,
      "step": 244
    },
    {
      "epoch": 0.10898576512455516,
      "grad_norm": 0.32202988862991333,
      "learning_rate": 0.00015175879396984925,
      "loss": 0.9763,
      "step": 245
    },
    {
      "epoch": 0.1094306049822064,
      "grad_norm": 0.33148205280303955,
      "learning_rate": 0.00015155778894472362,
      "loss": 1.0615,
      "step": 246
    },
    {
      "epoch": 0.10987544483985764,
      "grad_norm": 0.35082313418388367,
      "learning_rate": 0.000151356783919598,
      "loss": 1.0332,
      "step": 247
    },
    {
      "epoch": 0.1103202846975089,
      "grad_norm": 0.34773561358451843,
      "learning_rate": 0.00015115577889447237,
      "loss": 0.9172,
      "step": 248
    },
    {
      "epoch": 0.11076512455516015,
      "grad_norm": 0.35180824995040894,
      "learning_rate": 0.00015095477386934674,
      "loss": 1.077,
      "step": 249
    },
    {
      "epoch": 0.11120996441281139,
      "grad_norm": 0.33543917536735535,
      "learning_rate": 0.00015075376884422112,
      "loss": 0.9345,
      "step": 250
    },
    {
      "epoch": 0.11165480427046263,
      "grad_norm": 0.3516584038734436,
      "learning_rate": 0.0001505527638190955,
      "loss": 0.9481,
      "step": 251
    },
    {
      "epoch": 0.11209964412811388,
      "grad_norm": 0.3360784947872162,
      "learning_rate": 0.00015035175879396986,
      "loss": 0.9285,
      "step": 252
    },
    {
      "epoch": 0.11254448398576512,
      "grad_norm": 0.3512001633644104,
      "learning_rate": 0.00015015075376884423,
      "loss": 0.9745,
      "step": 253
    },
    {
      "epoch": 0.11298932384341637,
      "grad_norm": 0.34287190437316895,
      "learning_rate": 0.0001499497487437186,
      "loss": 1.0596,
      "step": 254
    },
    {
      "epoch": 0.11343416370106761,
      "grad_norm": 0.3830087184906006,
      "learning_rate": 0.00014974874371859298,
      "loss": 0.9534,
      "step": 255
    },
    {
      "epoch": 0.11387900355871886,
      "grad_norm": 0.3950470983982086,
      "learning_rate": 0.00014954773869346735,
      "loss": 1.045,
      "step": 256
    },
    {
      "epoch": 0.11432384341637011,
      "grad_norm": 0.5723717212677002,
      "learning_rate": 0.0001493467336683417,
      "loss": 0.8671,
      "step": 257
    },
    {
      "epoch": 0.11476868327402136,
      "grad_norm": 0.4654160737991333,
      "learning_rate": 0.0001491457286432161,
      "loss": 0.936,
      "step": 258
    },
    {
      "epoch": 0.1152135231316726,
      "grad_norm": 0.4141252338886261,
      "learning_rate": 0.00014894472361809047,
      "loss": 1.0275,
      "step": 259
    },
    {
      "epoch": 0.11565836298932385,
      "grad_norm": 0.3363032042980194,
      "learning_rate": 0.00014874371859296482,
      "loss": 0.899,
      "step": 260
    },
    {
      "epoch": 0.11610320284697509,
      "grad_norm": 0.35972562432289124,
      "learning_rate": 0.0001485427135678392,
      "loss": 0.8928,
      "step": 261
    },
    {
      "epoch": 0.11654804270462633,
      "grad_norm": 0.35416486859321594,
      "learning_rate": 0.00014834170854271356,
      "loss": 0.9557,
      "step": 262
    },
    {
      "epoch": 0.11699288256227758,
      "grad_norm": 0.3734114170074463,
      "learning_rate": 0.00014814070351758796,
      "loss": 0.956,
      "step": 263
    },
    {
      "epoch": 0.11743772241992882,
      "grad_norm": 0.3472467362880707,
      "learning_rate": 0.0001479396984924623,
      "loss": 0.9911,
      "step": 264
    },
    {
      "epoch": 0.11788256227758007,
      "grad_norm": 0.433698832988739,
      "learning_rate": 0.00014773869346733668,
      "loss": 0.9547,
      "step": 265
    },
    {
      "epoch": 0.11832740213523131,
      "grad_norm": 0.3658886253833771,
      "learning_rate": 0.00014753768844221106,
      "loss": 0.9044,
      "step": 266
    },
    {
      "epoch": 0.11877224199288257,
      "grad_norm": 0.3511579632759094,
      "learning_rate": 0.00014733668341708543,
      "loss": 0.9766,
      "step": 267
    },
    {
      "epoch": 0.11921708185053381,
      "grad_norm": 0.3257918357849121,
      "learning_rate": 0.0001471356783919598,
      "loss": 0.9641,
      "step": 268
    },
    {
      "epoch": 0.11966192170818506,
      "grad_norm": 0.3833535611629486,
      "learning_rate": 0.00014693467336683417,
      "loss": 1.0307,
      "step": 269
    },
    {
      "epoch": 0.1201067615658363,
      "grad_norm": 0.3672519326210022,
      "learning_rate": 0.00014673366834170855,
      "loss": 0.941,
      "step": 270
    },
    {
      "epoch": 0.12055160142348754,
      "grad_norm": 0.3263191282749176,
      "learning_rate": 0.00014653266331658292,
      "loss": 1.0327,
      "step": 271
    },
    {
      "epoch": 0.12099644128113879,
      "grad_norm": 0.3983098864555359,
      "learning_rate": 0.0001463316582914573,
      "loss": 1.011,
      "step": 272
    },
    {
      "epoch": 0.12144128113879003,
      "grad_norm": 0.37602588534355164,
      "learning_rate": 0.00014613065326633167,
      "loss": 1.0328,
      "step": 273
    },
    {
      "epoch": 0.12188612099644128,
      "grad_norm": 0.354548841714859,
      "learning_rate": 0.00014592964824120604,
      "loss": 1.0472,
      "step": 274
    },
    {
      "epoch": 0.12233096085409252,
      "grad_norm": 0.343363493680954,
      "learning_rate": 0.0001457286432160804,
      "loss": 1.0053,
      "step": 275
    },
    {
      "epoch": 0.12277580071174377,
      "grad_norm": 0.3958703577518463,
      "learning_rate": 0.00014552763819095479,
      "loss": 0.917,
      "step": 276
    },
    {
      "epoch": 0.12322064056939502,
      "grad_norm": 0.4580914378166199,
      "learning_rate": 0.00014532663316582916,
      "loss": 0.961,
      "step": 277
    },
    {
      "epoch": 0.12366548042704627,
      "grad_norm": 0.39822980761528015,
      "learning_rate": 0.00014512562814070353,
      "loss": 0.9276,
      "step": 278
    },
    {
      "epoch": 0.12411032028469751,
      "grad_norm": 0.3082531988620758,
      "learning_rate": 0.0001449246231155779,
      "loss": 0.9496,
      "step": 279
    },
    {
      "epoch": 0.12455516014234876,
      "grad_norm": 0.34612587094306946,
      "learning_rate": 0.00014472361809045228,
      "loss": 0.9371,
      "step": 280
    },
    {
      "epoch": 0.125,
      "grad_norm": 0.3305722177028656,
      "learning_rate": 0.00014452261306532665,
      "loss": 0.9265,
      "step": 281
    },
    {
      "epoch": 0.12544483985765126,
      "grad_norm": 0.35687321424484253,
      "learning_rate": 0.000144321608040201,
      "loss": 0.9935,
      "step": 282
    },
    {
      "epoch": 0.1258896797153025,
      "grad_norm": 0.3545040488243103,
      "learning_rate": 0.00014412060301507537,
      "loss": 0.973,
      "step": 283
    },
    {
      "epoch": 0.12633451957295375,
      "grad_norm": 0.3379552960395813,
      "learning_rate": 0.00014391959798994977,
      "loss": 1.0022,
      "step": 284
    },
    {
      "epoch": 0.12677935943060498,
      "grad_norm": 0.32599905133247375,
      "learning_rate": 0.00014371859296482411,
      "loss": 1.0497,
      "step": 285
    },
    {
      "epoch": 0.12722419928825623,
      "grad_norm": 0.38973209261894226,
      "learning_rate": 0.0001435175879396985,
      "loss": 0.9373,
      "step": 286
    },
    {
      "epoch": 0.12766903914590746,
      "grad_norm": 0.37509605288505554,
      "learning_rate": 0.00014331658291457286,
      "loss": 0.8695,
      "step": 287
    },
    {
      "epoch": 0.12811387900355872,
      "grad_norm": 0.33525556325912476,
      "learning_rate": 0.00014311557788944726,
      "loss": 0.9768,
      "step": 288
    },
    {
      "epoch": 0.12855871886120995,
      "grad_norm": 0.3496398329734802,
      "learning_rate": 0.0001429145728643216,
      "loss": 1.0044,
      "step": 289
    },
    {
      "epoch": 0.1290035587188612,
      "grad_norm": 0.39922475814819336,
      "learning_rate": 0.00014271356783919598,
      "loss": 0.9998,
      "step": 290
    },
    {
      "epoch": 0.12944839857651247,
      "grad_norm": 0.33336395025253296,
      "learning_rate": 0.00014251256281407035,
      "loss": 0.9871,
      "step": 291
    },
    {
      "epoch": 0.1298932384341637,
      "grad_norm": 0.30318179726600647,
      "learning_rate": 0.00014231155778894473,
      "loss": 0.9652,
      "step": 292
    },
    {
      "epoch": 0.13033807829181496,
      "grad_norm": 0.33315086364746094,
      "learning_rate": 0.0001421105527638191,
      "loss": 0.9793,
      "step": 293
    },
    {
      "epoch": 0.1307829181494662,
      "grad_norm": 0.33998075127601624,
      "learning_rate": 0.00014190954773869347,
      "loss": 0.945,
      "step": 294
    },
    {
      "epoch": 0.13122775800711745,
      "grad_norm": 0.3731788396835327,
      "learning_rate": 0.00014170854271356784,
      "loss": 0.9881,
      "step": 295
    },
    {
      "epoch": 0.13167259786476868,
      "grad_norm": 0.34216803312301636,
      "learning_rate": 0.00014150753768844222,
      "loss": 0.9576,
      "step": 296
    },
    {
      "epoch": 0.13211743772241993,
      "grad_norm": 0.4074036180973053,
      "learning_rate": 0.0001413065326633166,
      "loss": 1.0654,
      "step": 297
    },
    {
      "epoch": 0.13256227758007116,
      "grad_norm": 0.401727557182312,
      "learning_rate": 0.00014110552763819096,
      "loss": 0.9813,
      "step": 298
    },
    {
      "epoch": 0.13300711743772242,
      "grad_norm": 0.3025040626525879,
      "learning_rate": 0.00014090452261306534,
      "loss": 0.9491,
      "step": 299
    },
    {
      "epoch": 0.13345195729537365,
      "grad_norm": 0.3456086814403534,
      "learning_rate": 0.0001407035175879397,
      "loss": 1.102,
      "step": 300
    },
    {
      "epoch": 0.1338967971530249,
      "grad_norm": 0.36754533648490906,
      "learning_rate": 0.00014050251256281408,
      "loss": 0.9794,
      "step": 301
    },
    {
      "epoch": 0.13434163701067617,
      "grad_norm": 0.2782064378261566,
      "learning_rate": 0.00014030150753768846,
      "loss": 0.8153,
      "step": 302
    },
    {
      "epoch": 0.1347864768683274,
      "grad_norm": 0.35259565711021423,
      "learning_rate": 0.0001401005025125628,
      "loss": 1.032,
      "step": 303
    },
    {
      "epoch": 0.13523131672597866,
      "grad_norm": 0.3422435224056244,
      "learning_rate": 0.0001398994974874372,
      "loss": 0.9601,
      "step": 304
    },
    {
      "epoch": 0.1356761565836299,
      "grad_norm": 0.4378672242164612,
      "learning_rate": 0.00013969849246231157,
      "loss": 0.9241,
      "step": 305
    },
    {
      "epoch": 0.13612099644128114,
      "grad_norm": 0.33044546842575073,
      "learning_rate": 0.00013949748743718595,
      "loss": 0.9607,
      "step": 306
    },
    {
      "epoch": 0.13656583629893237,
      "grad_norm": 0.36180225014686584,
      "learning_rate": 0.0001392964824120603,
      "loss": 1.0878,
      "step": 307
    },
    {
      "epoch": 0.13701067615658363,
      "grad_norm": 0.4124760627746582,
      "learning_rate": 0.00013909547738693467,
      "loss": 0.9698,
      "step": 308
    },
    {
      "epoch": 0.13745551601423486,
      "grad_norm": 0.33320683240890503,
      "learning_rate": 0.00013889447236180907,
      "loss": 0.869,
      "step": 309
    },
    {
      "epoch": 0.13790035587188612,
      "grad_norm": 0.3595818877220154,
      "learning_rate": 0.0001386934673366834,
      "loss": 1.0227,
      "step": 310
    },
    {
      "epoch": 0.13834519572953738,
      "grad_norm": 0.35221633315086365,
      "learning_rate": 0.00013849246231155778,
      "loss": 0.9576,
      "step": 311
    },
    {
      "epoch": 0.1387900355871886,
      "grad_norm": 0.3357265889644623,
      "learning_rate": 0.00013829145728643216,
      "loss": 0.9877,
      "step": 312
    },
    {
      "epoch": 0.13923487544483987,
      "grad_norm": 0.3630138039588928,
      "learning_rate": 0.00013809045226130656,
      "loss": 0.9918,
      "step": 313
    },
    {
      "epoch": 0.1396797153024911,
      "grad_norm": 0.3503361642360687,
      "learning_rate": 0.0001378894472361809,
      "loss": 1.0585,
      "step": 314
    },
    {
      "epoch": 0.14012455516014236,
      "grad_norm": 0.33437854051589966,
      "learning_rate": 0.00013768844221105528,
      "loss": 1.1292,
      "step": 315
    },
    {
      "epoch": 0.14056939501779359,
      "grad_norm": 0.3571662902832031,
      "learning_rate": 0.00013748743718592965,
      "loss": 1.0084,
      "step": 316
    },
    {
      "epoch": 0.14101423487544484,
      "grad_norm": 0.3780195415019989,
      "learning_rate": 0.00013728643216080402,
      "loss": 0.98,
      "step": 317
    },
    {
      "epoch": 0.14145907473309607,
      "grad_norm": 0.3566683232784271,
      "learning_rate": 0.0001370854271356784,
      "loss": 0.9694,
      "step": 318
    },
    {
      "epoch": 0.14190391459074733,
      "grad_norm": 0.36311331391334534,
      "learning_rate": 0.00013688442211055277,
      "loss": 0.8768,
      "step": 319
    },
    {
      "epoch": 0.1423487544483986,
      "grad_norm": 0.3234863877296448,
      "learning_rate": 0.00013668341708542714,
      "loss": 0.9384,
      "step": 320
    },
    {
      "epoch": 0.14279359430604982,
      "grad_norm": 0.3360319137573242,
      "learning_rate": 0.00013648241206030151,
      "loss": 0.9831,
      "step": 321
    },
    {
      "epoch": 0.14323843416370108,
      "grad_norm": 0.3335811197757721,
      "learning_rate": 0.0001362814070351759,
      "loss": 0.9541,
      "step": 322
    },
    {
      "epoch": 0.1436832740213523,
      "grad_norm": 0.40138107538223267,
      "learning_rate": 0.00013608040201005026,
      "loss": 0.9647,
      "step": 323
    },
    {
      "epoch": 0.14412811387900357,
      "grad_norm": 0.36284399032592773,
      "learning_rate": 0.00013587939698492463,
      "loss": 0.9763,
      "step": 324
    },
    {
      "epoch": 0.1445729537366548,
      "grad_norm": 0.29980310797691345,
      "learning_rate": 0.000135678391959799,
      "loss": 0.9773,
      "step": 325
    },
    {
      "epoch": 0.14501779359430605,
      "grad_norm": 0.37920883297920227,
      "learning_rate": 0.00013547738693467338,
      "loss": 1.0024,
      "step": 326
    },
    {
      "epoch": 0.14546263345195729,
      "grad_norm": 0.32456889748573303,
      "learning_rate": 0.00013527638190954775,
      "loss": 0.995,
      "step": 327
    },
    {
      "epoch": 0.14590747330960854,
      "grad_norm": 0.368257999420166,
      "learning_rate": 0.0001350753768844221,
      "loss": 1.0205,
      "step": 328
    },
    {
      "epoch": 0.14635231316725977,
      "grad_norm": 0.37282678484916687,
      "learning_rate": 0.00013487437185929647,
      "loss": 0.8557,
      "step": 329
    },
    {
      "epoch": 0.14679715302491103,
      "grad_norm": 0.34274178743362427,
      "learning_rate": 0.00013467336683417087,
      "loss": 0.8833,
      "step": 330
    },
    {
      "epoch": 0.1472419928825623,
      "grad_norm": 0.34231486916542053,
      "learning_rate": 0.00013447236180904524,
      "loss": 0.9529,
      "step": 331
    },
    {
      "epoch": 0.14768683274021352,
      "grad_norm": 0.38436999917030334,
      "learning_rate": 0.0001342713567839196,
      "loss": 1.029,
      "step": 332
    },
    {
      "epoch": 0.14813167259786478,
      "grad_norm": 0.400931179523468,
      "learning_rate": 0.00013407035175879396,
      "loss": 0.9913,
      "step": 333
    },
    {
      "epoch": 0.148576512455516,
      "grad_norm": 0.32551515102386475,
      "learning_rate": 0.00013386934673366836,
      "loss": 0.9879,
      "step": 334
    },
    {
      "epoch": 0.14902135231316727,
      "grad_norm": 0.35669583082199097,
      "learning_rate": 0.0001336683417085427,
      "loss": 1.0058,
      "step": 335
    },
    {
      "epoch": 0.1494661921708185,
      "grad_norm": 0.36875060200691223,
      "learning_rate": 0.00013346733668341708,
      "loss": 0.9858,
      "step": 336
    },
    {
      "epoch": 0.14991103202846975,
      "grad_norm": 0.36278796195983887,
      "learning_rate": 0.00013326633165829146,
      "loss": 1.097,
      "step": 337
    },
    {
      "epoch": 0.15035587188612098,
      "grad_norm": 0.31908801198005676,
      "learning_rate": 0.00013306532663316586,
      "loss": 0.955,
      "step": 338
    },
    {
      "epoch": 0.15080071174377224,
      "grad_norm": 0.38954317569732666,
      "learning_rate": 0.0001328643216080402,
      "loss": 0.9402,
      "step": 339
    },
    {
      "epoch": 0.1512455516014235,
      "grad_norm": 0.3285794258117676,
      "learning_rate": 0.00013266331658291457,
      "loss": 0.9327,
      "step": 340
    },
    {
      "epoch": 0.15169039145907473,
      "grad_norm": 0.4542711675167084,
      "learning_rate": 0.00013246231155778895,
      "loss": 0.7939,
      "step": 341
    },
    {
      "epoch": 0.152135231316726,
      "grad_norm": 0.3730446696281433,
      "learning_rate": 0.00013226130653266332,
      "loss": 1.0014,
      "step": 342
    },
    {
      "epoch": 0.15258007117437722,
      "grad_norm": 0.3543410301208496,
      "learning_rate": 0.0001320603015075377,
      "loss": 0.9617,
      "step": 343
    },
    {
      "epoch": 0.15302491103202848,
      "grad_norm": 0.32625502347946167,
      "learning_rate": 0.00013185929648241207,
      "loss": 0.9975,
      "step": 344
    },
    {
      "epoch": 0.1534697508896797,
      "grad_norm": 0.39967095851898193,
      "learning_rate": 0.00013165829145728644,
      "loss": 0.8421,
      "step": 345
    },
    {
      "epoch": 0.15391459074733096,
      "grad_norm": 0.36405855417251587,
      "learning_rate": 0.0001314572864321608,
      "loss": 1.0338,
      "step": 346
    },
    {
      "epoch": 0.1543594306049822,
      "grad_norm": 0.32427796721458435,
      "learning_rate": 0.00013125628140703518,
      "loss": 0.971,
      "step": 347
    },
    {
      "epoch": 0.15480427046263345,
      "grad_norm": 0.3405332565307617,
      "learning_rate": 0.00013105527638190956,
      "loss": 0.9974,
      "step": 348
    },
    {
      "epoch": 0.1552491103202847,
      "grad_norm": 0.3611313998699188,
      "learning_rate": 0.00013085427135678393,
      "loss": 1.0026,
      "step": 349
    },
    {
      "epoch": 0.15569395017793594,
      "grad_norm": 0.3901672661304474,
      "learning_rate": 0.0001306532663316583,
      "loss": 0.924,
      "step": 350
    },
    {
      "epoch": 0.1561387900355872,
      "grad_norm": 0.38940900564193726,
      "learning_rate": 0.00013045226130653268,
      "loss": 1.0827,
      "step": 351
    },
    {
      "epoch": 0.15658362989323843,
      "grad_norm": 0.3226456046104431,
      "learning_rate": 0.00013025125628140705,
      "loss": 0.9063,
      "step": 352
    },
    {
      "epoch": 0.1570284697508897,
      "grad_norm": 0.2980092763900757,
      "learning_rate": 0.0001300502512562814,
      "loss": 0.9688,
      "step": 353
    },
    {
      "epoch": 0.15747330960854092,
      "grad_norm": 0.3783577084541321,
      "learning_rate": 0.00012984924623115577,
      "loss": 1.0825,
      "step": 354
    },
    {
      "epoch": 0.15791814946619218,
      "grad_norm": 0.3832230269908905,
      "learning_rate": 0.00012964824120603017,
      "loss": 1.0627,
      "step": 355
    },
    {
      "epoch": 0.1583629893238434,
      "grad_norm": 0.34628695249557495,
      "learning_rate": 0.00012944723618090454,
      "loss": 0.8811,
      "step": 356
    },
    {
      "epoch": 0.15880782918149466,
      "grad_norm": 0.43190130591392517,
      "learning_rate": 0.0001292462311557789,
      "loss": 1.0152,
      "step": 357
    },
    {
      "epoch": 0.1592526690391459,
      "grad_norm": 0.36563345789909363,
      "learning_rate": 0.00012904522613065326,
      "loss": 0.9848,
      "step": 358
    },
    {
      "epoch": 0.15969750889679715,
      "grad_norm": 0.2992965281009674,
      "learning_rate": 0.00012884422110552766,
      "loss": 0.9507,
      "step": 359
    },
    {
      "epoch": 0.1601423487544484,
      "grad_norm": 0.3296932876110077,
      "learning_rate": 0.000128643216080402,
      "loss": 1.1,
      "step": 360
    },
    {
      "epoch": 0.16058718861209964,
      "grad_norm": 0.41810277104377747,
      "learning_rate": 0.00012844221105527638,
      "loss": 0.8272,
      "step": 361
    },
    {
      "epoch": 0.1610320284697509,
      "grad_norm": 0.41459906101226807,
      "learning_rate": 0.00012824120603015075,
      "loss": 1.0549,
      "step": 362
    },
    {
      "epoch": 0.16147686832740213,
      "grad_norm": 0.35844656825065613,
      "learning_rate": 0.00012804020100502515,
      "loss": 1.0996,
      "step": 363
    },
    {
      "epoch": 0.1619217081850534,
      "grad_norm": 0.31563690304756165,
      "learning_rate": 0.0001278391959798995,
      "loss": 0.8873,
      "step": 364
    },
    {
      "epoch": 0.16236654804270462,
      "grad_norm": 0.3521721065044403,
      "learning_rate": 0.00012763819095477387,
      "loss": 0.8421,
      "step": 365
    },
    {
      "epoch": 0.16281138790035588,
      "grad_norm": 0.3149266839027405,
      "learning_rate": 0.00012743718592964824,
      "loss": 0.9538,
      "step": 366
    },
    {
      "epoch": 0.1632562277580071,
      "grad_norm": 0.36552533507347107,
      "learning_rate": 0.00012723618090452262,
      "loss": 0.9734,
      "step": 367
    },
    {
      "epoch": 0.16370106761565836,
      "grad_norm": 0.33934077620506287,
      "learning_rate": 0.000127035175879397,
      "loss": 1.0199,
      "step": 368
    },
    {
      "epoch": 0.16414590747330962,
      "grad_norm": 0.4329892098903656,
      "learning_rate": 0.00012683417085427136,
      "loss": 0.872,
      "step": 369
    },
    {
      "epoch": 0.16459074733096085,
      "grad_norm": 0.4003223776817322,
      "learning_rate": 0.00012663316582914574,
      "loss": 1.0864,
      "step": 370
    },
    {
      "epoch": 0.1650355871886121,
      "grad_norm": 0.37618136405944824,
      "learning_rate": 0.0001264321608040201,
      "loss": 0.9797,
      "step": 371
    },
    {
      "epoch": 0.16548042704626334,
      "grad_norm": 0.30325135588645935,
      "learning_rate": 0.00012623115577889448,
      "loss": 0.8896,
      "step": 372
    },
    {
      "epoch": 0.1659252669039146,
      "grad_norm": 0.42738157510757446,
      "learning_rate": 0.00012603015075376885,
      "loss": 1.0881,
      "step": 373
    },
    {
      "epoch": 0.16637010676156583,
      "grad_norm": 0.38146787881851196,
      "learning_rate": 0.00012582914572864323,
      "loss": 0.9281,
      "step": 374
    },
    {
      "epoch": 0.16681494661921709,
      "grad_norm": 0.40179941058158875,
      "learning_rate": 0.0001256281407035176,
      "loss": 0.9364,
      "step": 375
    },
    {
      "epoch": 0.16725978647686832,
      "grad_norm": 0.33039966225624084,
      "learning_rate": 0.00012542713567839197,
      "loss": 0.9216,
      "step": 376
    },
    {
      "epoch": 0.16770462633451957,
      "grad_norm": 0.3515557050704956,
      "learning_rate": 0.00012522613065326635,
      "loss": 1.0472,
      "step": 377
    },
    {
      "epoch": 0.16814946619217083,
      "grad_norm": 0.3095014691352844,
      "learning_rate": 0.0001250251256281407,
      "loss": 0.9153,
      "step": 378
    },
    {
      "epoch": 0.16859430604982206,
      "grad_norm": 0.42122480273246765,
      "learning_rate": 0.00012482412060301507,
      "loss": 1.0208,
      "step": 379
    },
    {
      "epoch": 0.16903914590747332,
      "grad_norm": 0.3838384747505188,
      "learning_rate": 0.00012462311557788947,
      "loss": 0.9936,
      "step": 380
    },
    {
      "epoch": 0.16948398576512455,
      "grad_norm": 0.38645121455192566,
      "learning_rate": 0.00012442211055276384,
      "loss": 0.983,
      "step": 381
    },
    {
      "epoch": 0.1699288256227758,
      "grad_norm": 0.34789687395095825,
      "learning_rate": 0.00012422110552763818,
      "loss": 0.9341,
      "step": 382
    },
    {
      "epoch": 0.17037366548042704,
      "grad_norm": 0.3516675531864166,
      "learning_rate": 0.00012402010050251256,
      "loss": 1.052,
      "step": 383
    },
    {
      "epoch": 0.1708185053380783,
      "grad_norm": 0.36350032687187195,
      "learning_rate": 0.00012381909547738696,
      "loss": 1.047,
      "step": 384
    },
    {
      "epoch": 0.17126334519572953,
      "grad_norm": 0.3919387757778168,
      "learning_rate": 0.0001236180904522613,
      "loss": 0.9899,
      "step": 385
    },
    {
      "epoch": 0.17170818505338079,
      "grad_norm": 0.3369230628013611,
      "learning_rate": 0.00012341708542713568,
      "loss": 0.981,
      "step": 386
    },
    {
      "epoch": 0.17215302491103202,
      "grad_norm": 0.34190815687179565,
      "learning_rate": 0.00012321608040201005,
      "loss": 0.9727,
      "step": 387
    },
    {
      "epoch": 0.17259786476868327,
      "grad_norm": 0.3579177260398865,
      "learning_rate": 0.00012301507537688445,
      "loss": 1.0071,
      "step": 388
    },
    {
      "epoch": 0.17304270462633453,
      "grad_norm": 0.307522714138031,
      "learning_rate": 0.0001228140703517588,
      "loss": 1.0614,
      "step": 389
    },
    {
      "epoch": 0.17348754448398576,
      "grad_norm": 0.3733164966106415,
      "learning_rate": 0.00012261306532663317,
      "loss": 1.1578,
      "step": 390
    },
    {
      "epoch": 0.17393238434163702,
      "grad_norm": 0.3878190517425537,
      "learning_rate": 0.00012241206030150754,
      "loss": 0.9454,
      "step": 391
    },
    {
      "epoch": 0.17437722419928825,
      "grad_norm": 0.37619033455848694,
      "learning_rate": 0.00012221105527638191,
      "loss": 0.9608,
      "step": 392
    },
    {
      "epoch": 0.1748220640569395,
      "grad_norm": 0.3204808533191681,
      "learning_rate": 0.00012201005025125629,
      "loss": 1.0344,
      "step": 393
    },
    {
      "epoch": 0.17526690391459074,
      "grad_norm": 0.3382740616798401,
      "learning_rate": 0.00012180904522613066,
      "loss": 0.9624,
      "step": 394
    },
    {
      "epoch": 0.175711743772242,
      "grad_norm": 0.3102628290653229,
      "learning_rate": 0.00012160804020100502,
      "loss": 0.8531,
      "step": 395
    },
    {
      "epoch": 0.17615658362989323,
      "grad_norm": 0.32901301980018616,
      "learning_rate": 0.00012140703517587942,
      "loss": 0.9773,
      "step": 396
    },
    {
      "epoch": 0.17660142348754448,
      "grad_norm": 0.3148498833179474,
      "learning_rate": 0.00012120603015075378,
      "loss": 0.9834,
      "step": 397
    },
    {
      "epoch": 0.17704626334519574,
      "grad_norm": 0.34393250942230225,
      "learning_rate": 0.00012100502512562815,
      "loss": 0.8226,
      "step": 398
    },
    {
      "epoch": 0.17749110320284697,
      "grad_norm": 0.415526807308197,
      "learning_rate": 0.00012080402010050251,
      "loss": 0.9692,
      "step": 399
    },
    {
      "epoch": 0.17793594306049823,
      "grad_norm": 0.34324660897254944,
      "learning_rate": 0.00012060301507537688,
      "loss": 1.0135,
      "step": 400
    },
    {
      "epoch": 0.17838078291814946,
      "grad_norm": 0.3012760579586029,
      "learning_rate": 0.00012040201005025127,
      "loss": 0.9889,
      "step": 401
    },
    {
      "epoch": 0.17882562277580072,
      "grad_norm": 0.3476797342300415,
      "learning_rate": 0.00012020100502512563,
      "loss": 0.9328,
      "step": 402
    },
    {
      "epoch": 0.17927046263345195,
      "grad_norm": 0.36807379126548767,
      "learning_rate": 0.00012,
      "loss": 1.0193,
      "step": 403
    },
    {
      "epoch": 0.1797153024911032,
      "grad_norm": 0.3321215808391571,
      "learning_rate": 0.00011979899497487436,
      "loss": 0.9197,
      "step": 404
    },
    {
      "epoch": 0.18016014234875444,
      "grad_norm": 0.48590320348739624,
      "learning_rate": 0.00011959798994974876,
      "loss": 0.8509,
      "step": 405
    },
    {
      "epoch": 0.1806049822064057,
      "grad_norm": 0.3456606864929199,
      "learning_rate": 0.00011939698492462312,
      "loss": 1.033,
      "step": 406
    },
    {
      "epoch": 0.18104982206405695,
      "grad_norm": 0.3836311101913452,
      "learning_rate": 0.0001191959798994975,
      "loss": 0.9396,
      "step": 407
    },
    {
      "epoch": 0.18149466192170818,
      "grad_norm": 0.3805595338344574,
      "learning_rate": 0.00011899497487437185,
      "loss": 0.9135,
      "step": 408
    },
    {
      "epoch": 0.18193950177935944,
      "grad_norm": 0.34188127517700195,
      "learning_rate": 0.00011879396984924624,
      "loss": 0.9551,
      "step": 409
    },
    {
      "epoch": 0.18238434163701067,
      "grad_norm": 0.40114229917526245,
      "learning_rate": 0.00011859296482412061,
      "loss": 0.93,
      "step": 410
    },
    {
      "epoch": 0.18282918149466193,
      "grad_norm": 0.3679947555065155,
      "learning_rate": 0.00011839195979899497,
      "loss": 1.0823,
      "step": 411
    },
    {
      "epoch": 0.18327402135231316,
      "grad_norm": 0.4190071225166321,
      "learning_rate": 0.00011819095477386935,
      "loss": 0.8648,
      "step": 412
    },
    {
      "epoch": 0.18371886120996442,
      "grad_norm": 0.39405253529548645,
      "learning_rate": 0.00011798994974874373,
      "loss": 1.0682,
      "step": 413
    },
    {
      "epoch": 0.18416370106761565,
      "grad_norm": 0.33679550886154175,
      "learning_rate": 0.0001177889447236181,
      "loss": 0.9674,
      "step": 414
    },
    {
      "epoch": 0.1846085409252669,
      "grad_norm": 0.3880975842475891,
      "learning_rate": 0.00011758793969849247,
      "loss": 0.9367,
      "step": 415
    },
    {
      "epoch": 0.18505338078291814,
      "grad_norm": 0.4066067337989807,
      "learning_rate": 0.00011738693467336684,
      "loss": 0.9712,
      "step": 416
    },
    {
      "epoch": 0.1854982206405694,
      "grad_norm": 0.48199501633644104,
      "learning_rate": 0.00011718592964824122,
      "loss": 0.9133,
      "step": 417
    },
    {
      "epoch": 0.18594306049822065,
      "grad_norm": 0.34463343024253845,
      "learning_rate": 0.00011698492462311558,
      "loss": 0.9384,
      "step": 418
    },
    {
      "epoch": 0.18638790035587188,
      "grad_norm": 0.4196937680244446,
      "learning_rate": 0.00011678391959798996,
      "loss": 0.9803,
      "step": 419
    },
    {
      "epoch": 0.18683274021352314,
      "grad_norm": 0.3185271620750427,
      "learning_rate": 0.00011658291457286432,
      "loss": 0.9267,
      "step": 420
    },
    {
      "epoch": 0.18727758007117437,
      "grad_norm": 0.36499395966529846,
      "learning_rate": 0.00011638190954773872,
      "loss": 0.9721,
      "step": 421
    },
    {
      "epoch": 0.18772241992882563,
      "grad_norm": 0.3580315411090851,
      "learning_rate": 0.00011618090452261308,
      "loss": 0.8834,
      "step": 422
    },
    {
      "epoch": 0.18816725978647686,
      "grad_norm": 0.38039615750312805,
      "learning_rate": 0.00011597989949748745,
      "loss": 0.9638,
      "step": 423
    },
    {
      "epoch": 0.18861209964412812,
      "grad_norm": 0.38077226281166077,
      "learning_rate": 0.00011577889447236181,
      "loss": 0.9077,
      "step": 424
    },
    {
      "epoch": 0.18905693950177935,
      "grad_norm": 0.33230000734329224,
      "learning_rate": 0.00011557788944723618,
      "loss": 1.0115,
      "step": 425
    },
    {
      "epoch": 0.1895017793594306,
      "grad_norm": 0.41263043880462646,
      "learning_rate": 0.00011537688442211057,
      "loss": 0.9162,
      "step": 426
    },
    {
      "epoch": 0.18994661921708186,
      "grad_norm": 0.32407188415527344,
      "learning_rate": 0.00011517587939698493,
      "loss": 0.9618,
      "step": 427
    },
    {
      "epoch": 0.1903914590747331,
      "grad_norm": 0.4661211669445038,
      "learning_rate": 0.0001149748743718593,
      "loss": 0.931,
      "step": 428
    },
    {
      "epoch": 0.19083629893238435,
      "grad_norm": 0.37407800555229187,
      "learning_rate": 0.00011477386934673366,
      "loss": 1.0046,
      "step": 429
    },
    {
      "epoch": 0.19128113879003558,
      "grad_norm": 0.3580109477043152,
      "learning_rate": 0.00011457286432160806,
      "loss": 1.0114,
      "step": 430
    },
    {
      "epoch": 0.19172597864768684,
      "grad_norm": 0.42048606276512146,
      "learning_rate": 0.00011437185929648242,
      "loss": 0.9468,
      "step": 431
    },
    {
      "epoch": 0.19217081850533807,
      "grad_norm": 0.38913312554359436,
      "learning_rate": 0.00011417085427135679,
      "loss": 1.1037,
      "step": 432
    },
    {
      "epoch": 0.19261565836298933,
      "grad_norm": 0.35675281286239624,
      "learning_rate": 0.00011396984924623115,
      "loss": 0.9292,
      "step": 433
    },
    {
      "epoch": 0.19306049822064056,
      "grad_norm": 0.3579562306404114,
      "learning_rate": 0.00011376884422110554,
      "loss": 0.9413,
      "step": 434
    },
    {
      "epoch": 0.19350533807829182,
      "grad_norm": 0.3751574456691742,
      "learning_rate": 0.00011356783919597991,
      "loss": 1.0018,
      "step": 435
    },
    {
      "epoch": 0.19395017793594305,
      "grad_norm": 0.3086400032043457,
      "learning_rate": 0.00011336683417085427,
      "loss": 0.9355,
      "step": 436
    },
    {
      "epoch": 0.1943950177935943,
      "grad_norm": 0.41924887895584106,
      "learning_rate": 0.00011316582914572864,
      "loss": 0.942,
      "step": 437
    },
    {
      "epoch": 0.19483985765124556,
      "grad_norm": 0.3349624276161194,
      "learning_rate": 0.00011296482412060303,
      "loss": 1.0232,
      "step": 438
    },
    {
      "epoch": 0.1952846975088968,
      "grad_norm": 0.4605553448200226,
      "learning_rate": 0.0001127638190954774,
      "loss": 1.0958,
      "step": 439
    },
    {
      "epoch": 0.19572953736654805,
      "grad_norm": 0.3877868950366974,
      "learning_rate": 0.00011256281407035176,
      "loss": 0.9227,
      "step": 440
    },
    {
      "epoch": 0.19617437722419928,
      "grad_norm": 0.3474135100841522,
      "learning_rate": 0.00011236180904522614,
      "loss": 0.8446,
      "step": 441
    },
    {
      "epoch": 0.19661921708185054,
      "grad_norm": 0.3451891541481018,
      "learning_rate": 0.00011216080402010052,
      "loss": 0.9641,
      "step": 442
    },
    {
      "epoch": 0.19706405693950177,
      "grad_norm": 0.3565372824668884,
      "learning_rate": 0.00011195979899497488,
      "loss": 0.9807,
      "step": 443
    },
    {
      "epoch": 0.19750889679715303,
      "grad_norm": 0.3283828794956207,
      "learning_rate": 0.00011175879396984925,
      "loss": 0.9949,
      "step": 444
    },
    {
      "epoch": 0.19795373665480426,
      "grad_norm": 0.3562460243701935,
      "learning_rate": 0.00011155778894472361,
      "loss": 0.9807,
      "step": 445
    },
    {
      "epoch": 0.19839857651245552,
      "grad_norm": 0.33841797709465027,
      "learning_rate": 0.00011135678391959799,
      "loss": 1.0213,
      "step": 446
    },
    {
      "epoch": 0.19884341637010677,
      "grad_norm": 0.3265354633331299,
      "learning_rate": 0.00011115577889447237,
      "loss": 0.9138,
      "step": 447
    },
    {
      "epoch": 0.199288256227758,
      "grad_norm": 0.35441333055496216,
      "learning_rate": 0.00011095477386934675,
      "loss": 0.8982,
      "step": 448
    },
    {
      "epoch": 0.19973309608540926,
      "grad_norm": 0.3564106822013855,
      "learning_rate": 0.0001107537688442211,
      "loss": 1.0298,
      "step": 449
    },
    {
      "epoch": 0.2001779359430605,
      "grad_norm": 0.510447084903717,
      "learning_rate": 0.00011055276381909548,
      "loss": 0.818,
      "step": 450
    },
    {
      "epoch": 0.20062277580071175,
      "grad_norm": 0.3568657338619232,
      "learning_rate": 0.00011035175879396986,
      "loss": 0.9187,
      "step": 451
    },
    {
      "epoch": 0.20106761565836298,
      "grad_norm": 0.40208712220191956,
      "learning_rate": 0.00011015075376884422,
      "loss": 1.0041,
      "step": 452
    },
    {
      "epoch": 0.20151245551601424,
      "grad_norm": 0.36585667729377747,
      "learning_rate": 0.0001099497487437186,
      "loss": 0.9413,
      "step": 453
    },
    {
      "epoch": 0.20195729537366547,
      "grad_norm": 0.36290839314460754,
      "learning_rate": 0.00010974874371859296,
      "loss": 1.0084,
      "step": 454
    },
    {
      "epoch": 0.20240213523131673,
      "grad_norm": 0.34766581654548645,
      "learning_rate": 0.00010954773869346736,
      "loss": 1.0761,
      "step": 455
    },
    {
      "epoch": 0.20284697508896798,
      "grad_norm": 0.3324412703514099,
      "learning_rate": 0.00010934673366834172,
      "loss": 0.9078,
      "step": 456
    },
    {
      "epoch": 0.20329181494661921,
      "grad_norm": 0.35148242115974426,
      "learning_rate": 0.00010914572864321609,
      "loss": 0.9148,
      "step": 457
    },
    {
      "epoch": 0.20373665480427047,
      "grad_norm": 0.36268171668052673,
      "learning_rate": 0.00010894472361809045,
      "loss": 0.9915,
      "step": 458
    },
    {
      "epoch": 0.2041814946619217,
      "grad_norm": 0.3343249261379242,
      "learning_rate": 0.00010874371859296483,
      "loss": 0.9596,
      "step": 459
    },
    {
      "epoch": 0.20462633451957296,
      "grad_norm": 0.3835364878177643,
      "learning_rate": 0.00010854271356783921,
      "loss": 0.9718,
      "step": 460
    },
    {
      "epoch": 0.2050711743772242,
      "grad_norm": 0.3455188274383545,
      "learning_rate": 0.00010834170854271357,
      "loss": 0.9918,
      "step": 461
    },
    {
      "epoch": 0.20551601423487545,
      "grad_norm": 0.3390166759490967,
      "learning_rate": 0.00010814070351758794,
      "loss": 0.9718,
      "step": 462
    },
    {
      "epoch": 0.20596085409252668,
      "grad_norm": 0.40748950839042664,
      "learning_rate": 0.00010793969849246233,
      "loss": 0.9766,
      "step": 463
    },
    {
      "epoch": 0.20640569395017794,
      "grad_norm": 0.41326215863227844,
      "learning_rate": 0.0001077386934673367,
      "loss": 0.9639,
      "step": 464
    },
    {
      "epoch": 0.20685053380782917,
      "grad_norm": 0.36672449111938477,
      "learning_rate": 0.00010753768844221106,
      "loss": 1.0027,
      "step": 465
    },
    {
      "epoch": 0.20729537366548043,
      "grad_norm": 0.3668520152568817,
      "learning_rate": 0.00010733668341708543,
      "loss": 0.9147,
      "step": 466
    },
    {
      "epoch": 0.20774021352313168,
      "grad_norm": 0.40608394145965576,
      "learning_rate": 0.00010713567839195982,
      "loss": 0.973,
      "step": 467
    },
    {
      "epoch": 0.20818505338078291,
      "grad_norm": 0.3467264175415039,
      "learning_rate": 0.00010693467336683418,
      "loss": 1.0182,
      "step": 468
    },
    {
      "epoch": 0.20862989323843417,
      "grad_norm": 0.3724631071090698,
      "learning_rate": 0.00010673366834170855,
      "loss": 0.9269,
      "step": 469
    },
    {
      "epoch": 0.2090747330960854,
      "grad_norm": 0.4459652006626129,
      "learning_rate": 0.00010653266331658291,
      "loss": 1.115,
      "step": 470
    },
    {
      "epoch": 0.20951957295373666,
      "grad_norm": 0.3838179111480713,
      "learning_rate": 0.00010633165829145728,
      "loss": 0.9367,
      "step": 471
    },
    {
      "epoch": 0.2099644128113879,
      "grad_norm": 0.3366687595844269,
      "learning_rate": 0.00010613065326633167,
      "loss": 1.0043,
      "step": 472
    },
    {
      "epoch": 0.21040925266903915,
      "grad_norm": 0.39886242151260376,
      "learning_rate": 0.00010592964824120604,
      "loss": 0.9596,
      "step": 473
    },
    {
      "epoch": 0.21085409252669038,
      "grad_norm": 0.36448466777801514,
      "learning_rate": 0.0001057286432160804,
      "loss": 0.874,
      "step": 474
    },
    {
      "epoch": 0.21129893238434164,
      "grad_norm": 0.36318713426589966,
      "learning_rate": 0.00010552763819095478,
      "loss": 0.8955,
      "step": 475
    },
    {
      "epoch": 0.2117437722419929,
      "grad_norm": 0.41962435841560364,
      "learning_rate": 0.00010532663316582916,
      "loss": 1.0139,
      "step": 476
    },
    {
      "epoch": 0.21218861209964412,
      "grad_norm": 0.34656140208244324,
      "learning_rate": 0.00010512562814070352,
      "loss": 0.9768,
      "step": 477
    },
    {
      "epoch": 0.21263345195729538,
      "grad_norm": 0.36636775732040405,
      "learning_rate": 0.0001049246231155779,
      "loss": 0.9826,
      "step": 478
    },
    {
      "epoch": 0.2130782918149466,
      "grad_norm": 0.3224871754646301,
      "learning_rate": 0.00010472361809045225,
      "loss": 0.9229,
      "step": 479
    },
    {
      "epoch": 0.21352313167259787,
      "grad_norm": 0.33149847388267517,
      "learning_rate": 0.00010452261306532664,
      "loss": 0.9374,
      "step": 480
    },
    {
      "epoch": 0.2139679715302491,
      "grad_norm": 0.356975793838501,
      "learning_rate": 0.00010432160804020101,
      "loss": 0.8682,
      "step": 481
    },
    {
      "epoch": 0.21441281138790036,
      "grad_norm": 0.3753889799118042,
      "learning_rate": 0.00010412060301507539,
      "loss": 0.961,
      "step": 482
    },
    {
      "epoch": 0.2148576512455516,
      "grad_norm": 0.3479616641998291,
      "learning_rate": 0.00010391959798994975,
      "loss": 0.9423,
      "step": 483
    },
    {
      "epoch": 0.21530249110320285,
      "grad_norm": 0.4025513529777527,
      "learning_rate": 0.00010371859296482413,
      "loss": 0.9917,
      "step": 484
    },
    {
      "epoch": 0.2157473309608541,
      "grad_norm": 0.3388558328151703,
      "learning_rate": 0.0001035175879396985,
      "loss": 0.994,
      "step": 485
    },
    {
      "epoch": 0.21619217081850534,
      "grad_norm": 0.3347098231315613,
      "learning_rate": 0.00010331658291457286,
      "loss": 0.9796,
      "step": 486
    },
    {
      "epoch": 0.2166370106761566,
      "grad_norm": 0.29469332098960876,
      "learning_rate": 0.00010311557788944724,
      "loss": 0.9296,
      "step": 487
    },
    {
      "epoch": 0.21708185053380782,
      "grad_norm": 0.4186575412750244,
      "learning_rate": 0.00010291457286432162,
      "loss": 1.0021,
      "step": 488
    },
    {
      "epoch": 0.21752669039145908,
      "grad_norm": 0.3087356388568878,
      "learning_rate": 0.00010271356783919598,
      "loss": 0.968,
      "step": 489
    },
    {
      "epoch": 0.2179715302491103,
      "grad_norm": 0.3883945643901825,
      "learning_rate": 0.00010251256281407036,
      "loss": 1.1866,
      "step": 490
    },
    {
      "epoch": 0.21841637010676157,
      "grad_norm": 0.31850650906562805,
      "learning_rate": 0.00010231155778894473,
      "loss": 0.9544,
      "step": 491
    },
    {
      "epoch": 0.2188612099644128,
      "grad_norm": 0.40497350692749023,
      "learning_rate": 0.00010211055276381909,
      "loss": 1.0134,
      "step": 492
    },
    {
      "epoch": 0.21930604982206406,
      "grad_norm": 0.31457439064979553,
      "learning_rate": 0.00010190954773869348,
      "loss": 0.8261,
      "step": 493
    },
    {
      "epoch": 0.2197508896797153,
      "grad_norm": 0.398622065782547,
      "learning_rate": 0.00010170854271356785,
      "loss": 1.0384,
      "step": 494
    },
    {
      "epoch": 0.22019572953736655,
      "grad_norm": 0.30925434827804565,
      "learning_rate": 0.00010150753768844221,
      "loss": 0.9075,
      "step": 495
    },
    {
      "epoch": 0.2206405693950178,
      "grad_norm": 0.3536154329776764,
      "learning_rate": 0.00010130653266331658,
      "loss": 0.9856,
      "step": 496
    },
    {
      "epoch": 0.22108540925266904,
      "grad_norm": 0.3990980386734009,
      "learning_rate": 0.00010110552763819097,
      "loss": 0.9822,
      "step": 497
    },
    {
      "epoch": 0.2215302491103203,
      "grad_norm": 0.3845369517803192,
      "learning_rate": 0.00010090452261306533,
      "loss": 0.9191,
      "step": 498
    },
    {
      "epoch": 0.22197508896797152,
      "grad_norm": 0.45938462018966675,
      "learning_rate": 0.0001007035175879397,
      "loss": 0.994,
      "step": 499
    },
    {
      "epoch": 0.22241992882562278,
      "grad_norm": 0.32502633333206177,
      "learning_rate": 0.00010050251256281407,
      "loss": 0.9397,
      "step": 500
    },
    {
      "epoch": 0.222864768683274,
      "grad_norm": 0.3375188410282135,
      "learning_rate": 0.00010030150753768846,
      "loss": 0.909,
      "step": 501
    },
    {
      "epoch": 0.22330960854092527,
      "grad_norm": 0.31290072202682495,
      "learning_rate": 0.00010010050251256282,
      "loss": 0.9114,
      "step": 502
    },
    {
      "epoch": 0.2237544483985765,
      "grad_norm": 0.37251392006874084,
      "learning_rate": 9.989949748743719e-05,
      "loss": 0.9812,
      "step": 503
    },
    {
      "epoch": 0.22419928825622776,
      "grad_norm": 0.33114826679229736,
      "learning_rate": 9.969849246231156e-05,
      "loss": 0.893,
      "step": 504
    },
    {
      "epoch": 0.22464412811387902,
      "grad_norm": 0.3244706392288208,
      "learning_rate": 9.949748743718594e-05,
      "loss": 0.9659,
      "step": 505
    },
    {
      "epoch": 0.22508896797153025,
      "grad_norm": 0.3634791374206543,
      "learning_rate": 9.929648241206031e-05,
      "loss": 1.0315,
      "step": 506
    },
    {
      "epoch": 0.2255338078291815,
      "grad_norm": 0.38301393389701843,
      "learning_rate": 9.909547738693468e-05,
      "loss": 0.9492,
      "step": 507
    },
    {
      "epoch": 0.22597864768683273,
      "grad_norm": 0.3449389338493347,
      "learning_rate": 9.889447236180906e-05,
      "loss": 0.9327,
      "step": 508
    },
    {
      "epoch": 0.226423487544484,
      "grad_norm": 0.3777461051940918,
      "learning_rate": 9.869346733668342e-05,
      "loss": 0.9938,
      "step": 509
    },
    {
      "epoch": 0.22686832740213522,
      "grad_norm": 0.3581281006336212,
      "learning_rate": 9.84924623115578e-05,
      "loss": 1.0184,
      "step": 510
    },
    {
      "epoch": 0.22731316725978648,
      "grad_norm": 0.3965352475643158,
      "learning_rate": 9.829145728643216e-05,
      "loss": 0.9131,
      "step": 511
    },
    {
      "epoch": 0.2277580071174377,
      "grad_norm": 0.33017244935035706,
      "learning_rate": 9.809045226130655e-05,
      "loss": 0.9547,
      "step": 512
    },
    {
      "epoch": 0.22820284697508897,
      "grad_norm": 0.35746607184410095,
      "learning_rate": 9.788944723618091e-05,
      "loss": 0.9917,
      "step": 513
    },
    {
      "epoch": 0.22864768683274023,
      "grad_norm": 0.36392533779144287,
      "learning_rate": 9.768844221105528e-05,
      "loss": 1.0056,
      "step": 514
    },
    {
      "epoch": 0.22909252669039146,
      "grad_norm": 0.3272344768047333,
      "learning_rate": 9.748743718592965e-05,
      "loss": 0.9895,
      "step": 515
    },
    {
      "epoch": 0.22953736654804271,
      "grad_norm": 0.4714422821998596,
      "learning_rate": 9.728643216080403e-05,
      "loss": 1.0156,
      "step": 516
    },
    {
      "epoch": 0.22998220640569395,
      "grad_norm": 0.3458651006221771,
      "learning_rate": 9.70854271356784e-05,
      "loss": 0.8773,
      "step": 517
    },
    {
      "epoch": 0.2304270462633452,
      "grad_norm": 0.32996249198913574,
      "learning_rate": 9.688442211055276e-05,
      "loss": 0.9028,
      "step": 518
    },
    {
      "epoch": 0.23087188612099643,
      "grad_norm": 0.3559573292732239,
      "learning_rate": 9.668341708542715e-05,
      "loss": 0.8993,
      "step": 519
    },
    {
      "epoch": 0.2313167259786477,
      "grad_norm": 0.36793214082717896,
      "learning_rate": 9.64824120603015e-05,
      "loss": 0.8616,
      "step": 520
    },
    {
      "epoch": 0.23176156583629892,
      "grad_norm": 0.3426240086555481,
      "learning_rate": 9.628140703517589e-05,
      "loss": 0.9483,
      "step": 521
    },
    {
      "epoch": 0.23220640569395018,
      "grad_norm": 0.4089488685131073,
      "learning_rate": 9.608040201005025e-05,
      "loss": 0.9446,
      "step": 522
    },
    {
      "epoch": 0.2326512455516014,
      "grad_norm": 0.40365123748779297,
      "learning_rate": 9.587939698492462e-05,
      "loss": 1.0485,
      "step": 523
    },
    {
      "epoch": 0.23309608540925267,
      "grad_norm": 0.3387534022331238,
      "learning_rate": 9.5678391959799e-05,
      "loss": 0.8881,
      "step": 524
    },
    {
      "epoch": 0.23354092526690393,
      "grad_norm": 0.3568766117095947,
      "learning_rate": 9.547738693467337e-05,
      "loss": 0.9514,
      "step": 525
    },
    {
      "epoch": 0.23398576512455516,
      "grad_norm": 0.42054617404937744,
      "learning_rate": 9.527638190954774e-05,
      "loss": 0.9481,
      "step": 526
    },
    {
      "epoch": 0.23443060498220641,
      "grad_norm": 0.441377192735672,
      "learning_rate": 9.507537688442212e-05,
      "loss": 0.927,
      "step": 527
    },
    {
      "epoch": 0.23487544483985764,
      "grad_norm": 0.3475041389465332,
      "learning_rate": 9.487437185929649e-05,
      "loss": 0.9735,
      "step": 528
    },
    {
      "epoch": 0.2353202846975089,
      "grad_norm": 0.4227800667285919,
      "learning_rate": 9.467336683417086e-05,
      "loss": 1.0058,
      "step": 529
    },
    {
      "epoch": 0.23576512455516013,
      "grad_norm": 0.3622898459434509,
      "learning_rate": 9.447236180904523e-05,
      "loss": 0.8462,
      "step": 530
    },
    {
      "epoch": 0.2362099644128114,
      "grad_norm": 0.4561034142971039,
      "learning_rate": 9.427135678391961e-05,
      "loss": 0.9615,
      "step": 531
    },
    {
      "epoch": 0.23665480427046262,
      "grad_norm": 0.35412004590034485,
      "learning_rate": 9.407035175879397e-05,
      "loss": 1.0062,
      "step": 532
    },
    {
      "epoch": 0.23709964412811388,
      "grad_norm": 0.4144454598426819,
      "learning_rate": 9.386934673366835e-05,
      "loss": 0.8798,
      "step": 533
    },
    {
      "epoch": 0.23754448398576514,
      "grad_norm": 0.40296846628189087,
      "learning_rate": 9.366834170854271e-05,
      "loss": 1.0744,
      "step": 534
    },
    {
      "epoch": 0.23798932384341637,
      "grad_norm": 0.35528820753097534,
      "learning_rate": 9.34673366834171e-05,
      "loss": 1.0083,
      "step": 535
    },
    {
      "epoch": 0.23843416370106763,
      "grad_norm": 0.37445664405822754,
      "learning_rate": 9.326633165829146e-05,
      "loss": 0.9631,
      "step": 536
    },
    {
      "epoch": 0.23887900355871886,
      "grad_norm": 0.360051691532135,
      "learning_rate": 9.306532663316585e-05,
      "loss": 0.936,
      "step": 537
    },
    {
      "epoch": 0.2393238434163701,
      "grad_norm": 0.440403550863266,
      "learning_rate": 9.28643216080402e-05,
      "loss": 0.8711,
      "step": 538
    },
    {
      "epoch": 0.23976868327402134,
      "grad_norm": 0.2862887680530548,
      "learning_rate": 9.266331658291458e-05,
      "loss": 0.9069,
      "step": 539
    },
    {
      "epoch": 0.2402135231316726,
      "grad_norm": 0.3707970976829529,
      "learning_rate": 9.246231155778895e-05,
      "loss": 1.0669,
      "step": 540
    },
    {
      "epoch": 0.24065836298932383,
      "grad_norm": 0.3454754948616028,
      "learning_rate": 9.226130653266331e-05,
      "loss": 1.0074,
      "step": 541
    },
    {
      "epoch": 0.2411032028469751,
      "grad_norm": 0.3313436806201935,
      "learning_rate": 9.20603015075377e-05,
      "loss": 0.9425,
      "step": 542
    },
    {
      "epoch": 0.24154804270462635,
      "grad_norm": 0.36515554785728455,
      "learning_rate": 9.185929648241206e-05,
      "loss": 0.9946,
      "step": 543
    },
    {
      "epoch": 0.24199288256227758,
      "grad_norm": 0.3730347454547882,
      "learning_rate": 9.165829145728644e-05,
      "loss": 0.9838,
      "step": 544
    },
    {
      "epoch": 0.24243772241992884,
      "grad_norm": 0.39922618865966797,
      "learning_rate": 9.14572864321608e-05,
      "loss": 0.815,
      "step": 545
    },
    {
      "epoch": 0.24288256227758007,
      "grad_norm": 0.31671297550201416,
      "learning_rate": 9.125628140703519e-05,
      "loss": 1.0014,
      "step": 546
    },
    {
      "epoch": 0.24332740213523132,
      "grad_norm": 0.39499327540397644,
      "learning_rate": 9.105527638190955e-05,
      "loss": 0.9104,
      "step": 547
    },
    {
      "epoch": 0.24377224199288255,
      "grad_norm": 0.3678281009197235,
      "learning_rate": 9.085427135678392e-05,
      "loss": 0.927,
      "step": 548
    },
    {
      "epoch": 0.2442170818505338,
      "grad_norm": 0.372120201587677,
      "learning_rate": 9.06532663316583e-05,
      "loss": 0.8212,
      "step": 549
    },
    {
      "epoch": 0.24466192170818504,
      "grad_norm": 0.3431313931941986,
      "learning_rate": 9.045226130653267e-05,
      "loss": 0.8883,
      "step": 550
    },
    {
      "epoch": 0.2451067615658363,
      "grad_norm": 0.391932874917984,
      "learning_rate": 9.025125628140704e-05,
      "loss": 0.9425,
      "step": 551
    },
    {
      "epoch": 0.24555160142348753,
      "grad_norm": 0.4534127116203308,
      "learning_rate": 9.005025125628141e-05,
      "loss": 1.2452,
      "step": 552
    },
    {
      "epoch": 0.2459964412811388,
      "grad_norm": 0.37136268615722656,
      "learning_rate": 8.984924623115579e-05,
      "loss": 1.0424,
      "step": 553
    },
    {
      "epoch": 0.24644128113879005,
      "grad_norm": 0.41206422448158264,
      "learning_rate": 8.964824120603016e-05,
      "loss": 1.1418,
      "step": 554
    },
    {
      "epoch": 0.24688612099644128,
      "grad_norm": 0.440790057182312,
      "learning_rate": 8.944723618090453e-05,
      "loss": 0.8978,
      "step": 555
    },
    {
      "epoch": 0.24733096085409254,
      "grad_norm": 0.32154756784439087,
      "learning_rate": 8.92462311557789e-05,
      "loss": 0.974,
      "step": 556
    },
    {
      "epoch": 0.24777580071174377,
      "grad_norm": 0.3203801214694977,
      "learning_rate": 8.904522613065326e-05,
      "loss": 0.9684,
      "step": 557
    },
    {
      "epoch": 0.24822064056939502,
      "grad_norm": 0.3494798243045807,
      "learning_rate": 8.884422110552765e-05,
      "loss": 0.9127,
      "step": 558
    },
    {
      "epoch": 0.24866548042704625,
      "grad_norm": 0.3334081470966339,
      "learning_rate": 8.864321608040201e-05,
      "loss": 0.9685,
      "step": 559
    },
    {
      "epoch": 0.2491103202846975,
      "grad_norm": 0.3990677297115326,
      "learning_rate": 8.84422110552764e-05,
      "loss": 0.9923,
      "step": 560
    },
    {
      "epoch": 0.24955516014234874,
      "grad_norm": 0.39742422103881836,
      "learning_rate": 8.824120603015076e-05,
      "loss": 0.9786,
      "step": 561
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.396680623292923,
      "learning_rate": 8.804020100502513e-05,
      "loss": 0.9636,
      "step": 562
    },
    {
      "epoch": 0.25044483985765126,
      "grad_norm": 0.32014915347099304,
      "learning_rate": 8.78391959798995e-05,
      "loss": 0.9392,
      "step": 563
    },
    {
      "epoch": 0.2508896797153025,
      "grad_norm": 0.31222036480903625,
      "learning_rate": 8.763819095477387e-05,
      "loss": 0.8806,
      "step": 564
    },
    {
      "epoch": 0.2513345195729537,
      "grad_norm": 0.3277677595615387,
      "learning_rate": 8.743718592964825e-05,
      "loss": 0.8699,
      "step": 565
    },
    {
      "epoch": 0.251779359430605,
      "grad_norm": 0.42860186100006104,
      "learning_rate": 8.723618090452261e-05,
      "loss": 0.9997,
      "step": 566
    },
    {
      "epoch": 0.25222419928825623,
      "grad_norm": 0.35114365816116333,
      "learning_rate": 8.7035175879397e-05,
      "loss": 0.947,
      "step": 567
    },
    {
      "epoch": 0.2526690391459075,
      "grad_norm": 0.3189033269882202,
      "learning_rate": 8.683417085427135e-05,
      "loss": 0.904,
      "step": 568
    },
    {
      "epoch": 0.2531138790035587,
      "grad_norm": 0.37736451625823975,
      "learning_rate": 8.663316582914574e-05,
      "loss": 0.9518,
      "step": 569
    },
    {
      "epoch": 0.25355871886120995,
      "grad_norm": 0.357546329498291,
      "learning_rate": 8.64321608040201e-05,
      "loss": 0.9499,
      "step": 570
    },
    {
      "epoch": 0.2540035587188612,
      "grad_norm": 0.37832340598106384,
      "learning_rate": 8.623115577889449e-05,
      "loss": 0.895,
      "step": 571
    },
    {
      "epoch": 0.25444839857651247,
      "grad_norm": 0.3017910122871399,
      "learning_rate": 8.603015075376884e-05,
      "loss": 1.041,
      "step": 572
    },
    {
      "epoch": 0.2548932384341637,
      "grad_norm": 0.3350992798805237,
      "learning_rate": 8.582914572864322e-05,
      "loss": 0.9303,
      "step": 573
    },
    {
      "epoch": 0.25533807829181493,
      "grad_norm": 0.3973081707954407,
      "learning_rate": 8.562814070351759e-05,
      "loss": 0.9663,
      "step": 574
    },
    {
      "epoch": 0.2557829181494662,
      "grad_norm": 0.3381018340587616,
      "learning_rate": 8.542713567839196e-05,
      "loss": 0.9427,
      "step": 575
    },
    {
      "epoch": 0.25622775800711745,
      "grad_norm": 0.3191261887550354,
      "learning_rate": 8.522613065326634e-05,
      "loss": 0.8298,
      "step": 576
    },
    {
      "epoch": 0.2566725978647687,
      "grad_norm": 0.4260123372077942,
      "learning_rate": 8.502512562814071e-05,
      "loss": 1.0098,
      "step": 577
    },
    {
      "epoch": 0.2571174377224199,
      "grad_norm": 0.34333735704421997,
      "learning_rate": 8.482412060301508e-05,
      "loss": 1.0,
      "step": 578
    },
    {
      "epoch": 0.25756227758007116,
      "grad_norm": 0.4197953939437866,
      "learning_rate": 8.462311557788946e-05,
      "loss": 1.0629,
      "step": 579
    },
    {
      "epoch": 0.2580071174377224,
      "grad_norm": 0.37437987327575684,
      "learning_rate": 8.442211055276383e-05,
      "loss": 0.9298,
      "step": 580
    },
    {
      "epoch": 0.2584519572953737,
      "grad_norm": 0.40584203600883484,
      "learning_rate": 8.42211055276382e-05,
      "loss": 0.9493,
      "step": 581
    },
    {
      "epoch": 0.25889679715302494,
      "grad_norm": 0.2971116900444031,
      "learning_rate": 8.402010050251256e-05,
      "loss": 0.9228,
      "step": 582
    },
    {
      "epoch": 0.25934163701067614,
      "grad_norm": 0.33302900195121765,
      "learning_rate": 8.381909547738695e-05,
      "loss": 0.9277,
      "step": 583
    },
    {
      "epoch": 0.2597864768683274,
      "grad_norm": 0.4172103703022003,
      "learning_rate": 8.36180904522613e-05,
      "loss": 0.9604,
      "step": 584
    },
    {
      "epoch": 0.26023131672597866,
      "grad_norm": 0.36361220479011536,
      "learning_rate": 8.341708542713568e-05,
      "loss": 0.927,
      "step": 585
    },
    {
      "epoch": 0.2606761565836299,
      "grad_norm": 0.365182489156723,
      "learning_rate": 8.321608040201005e-05,
      "loss": 0.9246,
      "step": 586
    },
    {
      "epoch": 0.2611209964412811,
      "grad_norm": 0.32461798191070557,
      "learning_rate": 8.301507537688443e-05,
      "loss": 0.9727,
      "step": 587
    },
    {
      "epoch": 0.2615658362989324,
      "grad_norm": 0.3159072995185852,
      "learning_rate": 8.28140703517588e-05,
      "loss": 0.9286,
      "step": 588
    },
    {
      "epoch": 0.26201067615658363,
      "grad_norm": 0.32971808314323425,
      "learning_rate": 8.261306532663317e-05,
      "loss": 0.9639,
      "step": 589
    },
    {
      "epoch": 0.2624555160142349,
      "grad_norm": 0.5026357173919678,
      "learning_rate": 8.241206030150754e-05,
      "loss": 0.9706,
      "step": 590
    },
    {
      "epoch": 0.2629003558718861,
      "grad_norm": 0.37912416458129883,
      "learning_rate": 8.22110552763819e-05,
      "loss": 0.8684,
      "step": 591
    },
    {
      "epoch": 0.26334519572953735,
      "grad_norm": 0.3844146430492401,
      "learning_rate": 8.201005025125629e-05,
      "loss": 0.9659,
      "step": 592
    },
    {
      "epoch": 0.2637900355871886,
      "grad_norm": 0.3652094006538391,
      "learning_rate": 8.180904522613065e-05,
      "loss": 0.8978,
      "step": 593
    },
    {
      "epoch": 0.26423487544483987,
      "grad_norm": 0.3851691484451294,
      "learning_rate": 8.160804020100504e-05,
      "loss": 1.0336,
      "step": 594
    },
    {
      "epoch": 0.2646797153024911,
      "grad_norm": 0.3992913067340851,
      "learning_rate": 8.14070351758794e-05,
      "loss": 0.93,
      "step": 595
    },
    {
      "epoch": 0.26512455516014233,
      "grad_norm": 0.39918407797813416,
      "learning_rate": 8.120603015075378e-05,
      "loss": 1.0553,
      "step": 596
    },
    {
      "epoch": 0.2655693950177936,
      "grad_norm": 0.369907021522522,
      "learning_rate": 8.100502512562814e-05,
      "loss": 1.0672,
      "step": 597
    },
    {
      "epoch": 0.26601423487544484,
      "grad_norm": 0.3794615566730499,
      "learning_rate": 8.080402010050251e-05,
      "loss": 0.9986,
      "step": 598
    },
    {
      "epoch": 0.2664590747330961,
      "grad_norm": 0.4018343389034271,
      "learning_rate": 8.060301507537689e-05,
      "loss": 0.9832,
      "step": 599
    },
    {
      "epoch": 0.2669039145907473,
      "grad_norm": 0.3550480902194977,
      "learning_rate": 8.040201005025126e-05,
      "loss": 0.8965,
      "step": 600
    },
    {
      "epoch": 0.26734875444839856,
      "grad_norm": 0.3323322832584381,
      "learning_rate": 8.020100502512563e-05,
      "loss": 0.9779,
      "step": 601
    },
    {
      "epoch": 0.2677935943060498,
      "grad_norm": 0.3290577828884125,
      "learning_rate": 8e-05,
      "loss": 1.0571,
      "step": 602
    },
    {
      "epoch": 0.2682384341637011,
      "grad_norm": 0.36351272463798523,
      "learning_rate": 7.979899497487438e-05,
      "loss": 0.9393,
      "step": 603
    },
    {
      "epoch": 0.26868327402135234,
      "grad_norm": 0.3494581878185272,
      "learning_rate": 7.959798994974875e-05,
      "loss": 0.9157,
      "step": 604
    },
    {
      "epoch": 0.26912811387900354,
      "grad_norm": 0.36106282472610474,
      "learning_rate": 7.939698492462313e-05,
      "loss": 0.9772,
      "step": 605
    },
    {
      "epoch": 0.2695729537366548,
      "grad_norm": 0.35360780358314514,
      "learning_rate": 7.91959798994975e-05,
      "loss": 1.0008,
      "step": 606
    },
    {
      "epoch": 0.27001779359430605,
      "grad_norm": 0.3501545786857605,
      "learning_rate": 7.899497487437186e-05,
      "loss": 0.9061,
      "step": 607
    },
    {
      "epoch": 0.2704626334519573,
      "grad_norm": 0.37978798151016235,
      "learning_rate": 7.879396984924623e-05,
      "loss": 1.0096,
      "step": 608
    },
    {
      "epoch": 0.2709074733096085,
      "grad_norm": 0.3628638684749603,
      "learning_rate": 7.85929648241206e-05,
      "loss": 0.9912,
      "step": 609
    },
    {
      "epoch": 0.2713523131672598,
      "grad_norm": 0.3882206976413727,
      "learning_rate": 7.839195979899498e-05,
      "loss": 0.8698,
      "step": 610
    },
    {
      "epoch": 0.27179715302491103,
      "grad_norm": 0.44139206409454346,
      "learning_rate": 7.819095477386935e-05,
      "loss": 0.9444,
      "step": 611
    },
    {
      "epoch": 0.2722419928825623,
      "grad_norm": 0.2937663793563843,
      "learning_rate": 7.798994974874372e-05,
      "loss": 0.8367,
      "step": 612
    },
    {
      "epoch": 0.27268683274021355,
      "grad_norm": 0.34553638100624084,
      "learning_rate": 7.77889447236181e-05,
      "loss": 1.0092,
      "step": 613
    },
    {
      "epoch": 0.27313167259786475,
      "grad_norm": 0.4855239689350128,
      "learning_rate": 7.758793969849247e-05,
      "loss": 0.8105,
      "step": 614
    },
    {
      "epoch": 0.273576512455516,
      "grad_norm": 0.359298974275589,
      "learning_rate": 7.738693467336684e-05,
      "loss": 0.9225,
      "step": 615
    },
    {
      "epoch": 0.27402135231316727,
      "grad_norm": 0.3697519600391388,
      "learning_rate": 7.71859296482412e-05,
      "loss": 1.0232,
      "step": 616
    },
    {
      "epoch": 0.2744661921708185,
      "grad_norm": 0.3558047115802765,
      "learning_rate": 7.698492462311559e-05,
      "loss": 1.0508,
      "step": 617
    },
    {
      "epoch": 0.2749110320284697,
      "grad_norm": 0.375429630279541,
      "learning_rate": 7.678391959798995e-05,
      "loss": 0.9915,
      "step": 618
    },
    {
      "epoch": 0.275355871886121,
      "grad_norm": 0.32131966948509216,
      "learning_rate": 7.658291457286433e-05,
      "loss": 0.9213,
      "step": 619
    },
    {
      "epoch": 0.27580071174377224,
      "grad_norm": 0.4405272305011749,
      "learning_rate": 7.638190954773869e-05,
      "loss": 0.9044,
      "step": 620
    },
    {
      "epoch": 0.2762455516014235,
      "grad_norm": 0.38022780418395996,
      "learning_rate": 7.618090452261307e-05,
      "loss": 0.9114,
      "step": 621
    },
    {
      "epoch": 0.27669039145907476,
      "grad_norm": 0.375396728515625,
      "learning_rate": 7.597989949748744e-05,
      "loss": 0.9496,
      "step": 622
    },
    {
      "epoch": 0.27713523131672596,
      "grad_norm": 0.38898783922195435,
      "learning_rate": 7.577889447236181e-05,
      "loss": 0.9488,
      "step": 623
    },
    {
      "epoch": 0.2775800711743772,
      "grad_norm": 0.32414594292640686,
      "learning_rate": 7.557788944723618e-05,
      "loss": 0.9054,
      "step": 624
    },
    {
      "epoch": 0.2780249110320285,
      "grad_norm": 0.34590399265289307,
      "learning_rate": 7.537688442211056e-05,
      "loss": 0.9491,
      "step": 625
    },
    {
      "epoch": 0.27846975088967973,
      "grad_norm": 0.4231082499027252,
      "learning_rate": 7.517587939698493e-05,
      "loss": 1.0757,
      "step": 626
    },
    {
      "epoch": 0.27891459074733094,
      "grad_norm": 0.3876168131828308,
      "learning_rate": 7.49748743718593e-05,
      "loss": 0.9705,
      "step": 627
    },
    {
      "epoch": 0.2793594306049822,
      "grad_norm": 0.3473493158817291,
      "learning_rate": 7.477386934673368e-05,
      "loss": 0.9262,
      "step": 628
    },
    {
      "epoch": 0.27980427046263345,
      "grad_norm": 0.34793248772621155,
      "learning_rate": 7.457286432160805e-05,
      "loss": 0.9728,
      "step": 629
    },
    {
      "epoch": 0.2802491103202847,
      "grad_norm": 0.3288039267063141,
      "learning_rate": 7.437185929648241e-05,
      "loss": 0.9609,
      "step": 630
    },
    {
      "epoch": 0.28069395017793597,
      "grad_norm": 0.37984830141067505,
      "learning_rate": 7.417085427135678e-05,
      "loss": 1.0235,
      "step": 631
    },
    {
      "epoch": 0.28113879003558717,
      "grad_norm": 0.382462739944458,
      "learning_rate": 7.396984924623115e-05,
      "loss": 0.9898,
      "step": 632
    },
    {
      "epoch": 0.28158362989323843,
      "grad_norm": 0.406568318605423,
      "learning_rate": 7.376884422110553e-05,
      "loss": 0.9442,
      "step": 633
    },
    {
      "epoch": 0.2820284697508897,
      "grad_norm": 0.33671334385871887,
      "learning_rate": 7.35678391959799e-05,
      "loss": 0.9771,
      "step": 634
    },
    {
      "epoch": 0.28247330960854095,
      "grad_norm": 0.34497055411338806,
      "learning_rate": 7.336683417085427e-05,
      "loss": 0.9452,
      "step": 635
    },
    {
      "epoch": 0.28291814946619215,
      "grad_norm": 0.37973660230636597,
      "learning_rate": 7.316582914572865e-05,
      "loss": 1.0431,
      "step": 636
    },
    {
      "epoch": 0.2833629893238434,
      "grad_norm": 0.391178160905838,
      "learning_rate": 7.296482412060302e-05,
      "loss": 0.9732,
      "step": 637
    },
    {
      "epoch": 0.28380782918149466,
      "grad_norm": 0.41452303528785706,
      "learning_rate": 7.276381909547739e-05,
      "loss": 0.986,
      "step": 638
    },
    {
      "epoch": 0.2842526690391459,
      "grad_norm": 0.388171911239624,
      "learning_rate": 7.256281407035177e-05,
      "loss": 0.9368,
      "step": 639
    },
    {
      "epoch": 0.2846975088967972,
      "grad_norm": 0.40021994709968567,
      "learning_rate": 7.236180904522614e-05,
      "loss": 0.9213,
      "step": 640
    },
    {
      "epoch": 0.2851423487544484,
      "grad_norm": 0.380087286233902,
      "learning_rate": 7.21608040201005e-05,
      "loss": 0.9452,
      "step": 641
    },
    {
      "epoch": 0.28558718861209964,
      "grad_norm": 0.3855552673339844,
      "learning_rate": 7.195979899497488e-05,
      "loss": 0.8841,
      "step": 642
    },
    {
      "epoch": 0.2860320284697509,
      "grad_norm": 0.4310576319694519,
      "learning_rate": 7.175879396984924e-05,
      "loss": 0.9666,
      "step": 643
    },
    {
      "epoch": 0.28647686832740216,
      "grad_norm": 0.3338180482387543,
      "learning_rate": 7.155778894472363e-05,
      "loss": 0.9127,
      "step": 644
    },
    {
      "epoch": 0.28692170818505336,
      "grad_norm": 0.373388409614563,
      "learning_rate": 7.135678391959799e-05,
      "loss": 0.9409,
      "step": 645
    },
    {
      "epoch": 0.2873665480427046,
      "grad_norm": 0.37845322489738464,
      "learning_rate": 7.115577889447236e-05,
      "loss": 0.9885,
      "step": 646
    },
    {
      "epoch": 0.2878113879003559,
      "grad_norm": 0.39277783036231995,
      "learning_rate": 7.095477386934674e-05,
      "loss": 0.9819,
      "step": 647
    },
    {
      "epoch": 0.28825622775800713,
      "grad_norm": 0.3995840549468994,
      "learning_rate": 7.075376884422111e-05,
      "loss": 1.018,
      "step": 648
    },
    {
      "epoch": 0.28870106761565834,
      "grad_norm": 0.48846814036369324,
      "learning_rate": 7.055276381909548e-05,
      "loss": 1.0557,
      "step": 649
    },
    {
      "epoch": 0.2891459074733096,
      "grad_norm": 0.44867080450057983,
      "learning_rate": 7.035175879396985e-05,
      "loss": 0.8785,
      "step": 650
    },
    {
      "epoch": 0.28959074733096085,
      "grad_norm": 0.3561117649078369,
      "learning_rate": 7.015075376884423e-05,
      "loss": 0.8914,
      "step": 651
    },
    {
      "epoch": 0.2900355871886121,
      "grad_norm": 0.4034915268421173,
      "learning_rate": 6.99497487437186e-05,
      "loss": 0.9038,
      "step": 652
    },
    {
      "epoch": 0.29048042704626337,
      "grad_norm": 0.33660322427749634,
      "learning_rate": 6.974874371859297e-05,
      "loss": 0.9757,
      "step": 653
    },
    {
      "epoch": 0.29092526690391457,
      "grad_norm": 0.3358772099018097,
      "learning_rate": 6.954773869346733e-05,
      "loss": 0.9003,
      "step": 654
    },
    {
      "epoch": 0.29137010676156583,
      "grad_norm": 0.40661585330963135,
      "learning_rate": 6.93467336683417e-05,
      "loss": 0.8901,
      "step": 655
    },
    {
      "epoch": 0.2918149466192171,
      "grad_norm": 0.3901599943637848,
      "learning_rate": 6.914572864321608e-05,
      "loss": 0.8622,
      "step": 656
    },
    {
      "epoch": 0.29225978647686834,
      "grad_norm": 0.4064997732639313,
      "learning_rate": 6.894472361809045e-05,
      "loss": 0.8799,
      "step": 657
    },
    {
      "epoch": 0.29270462633451955,
      "grad_norm": 0.3740297555923462,
      "learning_rate": 6.874371859296482e-05,
      "loss": 0.9026,
      "step": 658
    },
    {
      "epoch": 0.2931494661921708,
      "grad_norm": 0.3562834858894348,
      "learning_rate": 6.85427135678392e-05,
      "loss": 0.949,
      "step": 659
    },
    {
      "epoch": 0.29359430604982206,
      "grad_norm": 0.41133421659469604,
      "learning_rate": 6.834170854271357e-05,
      "loss": 1.0558,
      "step": 660
    },
    {
      "epoch": 0.2940391459074733,
      "grad_norm": 0.4356454014778137,
      "learning_rate": 6.814070351758794e-05,
      "loss": 0.9691,
      "step": 661
    },
    {
      "epoch": 0.2944839857651246,
      "grad_norm": 0.3359556496143341,
      "learning_rate": 6.793969849246232e-05,
      "loss": 0.9266,
      "step": 662
    },
    {
      "epoch": 0.2949288256227758,
      "grad_norm": 0.34760648012161255,
      "learning_rate": 6.773869346733669e-05,
      "loss": 0.8974,
      "step": 663
    },
    {
      "epoch": 0.29537366548042704,
      "grad_norm": 0.42510053515434265,
      "learning_rate": 6.753768844221105e-05,
      "loss": 0.9537,
      "step": 664
    },
    {
      "epoch": 0.2958185053380783,
      "grad_norm": 0.35934481024742126,
      "learning_rate": 6.733668341708544e-05,
      "loss": 0.9455,
      "step": 665
    },
    {
      "epoch": 0.29626334519572955,
      "grad_norm": 0.3559943735599518,
      "learning_rate": 6.71356783919598e-05,
      "loss": 0.9363,
      "step": 666
    },
    {
      "epoch": 0.29670818505338076,
      "grad_norm": 0.36470353603363037,
      "learning_rate": 6.693467336683418e-05,
      "loss": 0.978,
      "step": 667
    },
    {
      "epoch": 0.297153024911032,
      "grad_norm": 0.4014419913291931,
      "learning_rate": 6.673366834170854e-05,
      "loss": 1.0503,
      "step": 668
    },
    {
      "epoch": 0.2975978647686833,
      "grad_norm": 0.4242320656776428,
      "learning_rate": 6.653266331658293e-05,
      "loss": 0.8848,
      "step": 669
    },
    {
      "epoch": 0.29804270462633453,
      "grad_norm": 0.3722448945045471,
      "learning_rate": 6.633165829145729e-05,
      "loss": 0.9487,
      "step": 670
    },
    {
      "epoch": 0.2984875444839858,
      "grad_norm": 0.33282628655433655,
      "learning_rate": 6.613065326633166e-05,
      "loss": 0.9483,
      "step": 671
    },
    {
      "epoch": 0.298932384341637,
      "grad_norm": 0.45830950140953064,
      "learning_rate": 6.592964824120603e-05,
      "loss": 1.0413,
      "step": 672
    },
    {
      "epoch": 0.29937722419928825,
      "grad_norm": 0.32155314087867737,
      "learning_rate": 6.57286432160804e-05,
      "loss": 0.9299,
      "step": 673
    },
    {
      "epoch": 0.2998220640569395,
      "grad_norm": 0.319416344165802,
      "learning_rate": 6.552763819095478e-05,
      "loss": 0.9,
      "step": 674
    },
    {
      "epoch": 0.30026690391459077,
      "grad_norm": 0.4341578781604767,
      "learning_rate": 6.532663316582915e-05,
      "loss": 0.9455,
      "step": 675
    },
    {
      "epoch": 0.30071174377224197,
      "grad_norm": 0.4117160737514496,
      "learning_rate": 6.512562814070352e-05,
      "loss": 0.9682,
      "step": 676
    },
    {
      "epoch": 0.3011565836298932,
      "grad_norm": 0.42781034111976624,
      "learning_rate": 6.492462311557788e-05,
      "loss": 1.0128,
      "step": 677
    },
    {
      "epoch": 0.3016014234875445,
      "grad_norm": 0.3355860710144043,
      "learning_rate": 6.472361809045227e-05,
      "loss": 0.911,
      "step": 678
    },
    {
      "epoch": 0.30204626334519574,
      "grad_norm": 0.3754599690437317,
      "learning_rate": 6.452261306532663e-05,
      "loss": 1.0985,
      "step": 679
    },
    {
      "epoch": 0.302491103202847,
      "grad_norm": 0.3943292200565338,
      "learning_rate": 6.4321608040201e-05,
      "loss": 0.9051,
      "step": 680
    },
    {
      "epoch": 0.3029359430604982,
      "grad_norm": 0.3109300434589386,
      "learning_rate": 6.412060301507538e-05,
      "loss": 0.8964,
      "step": 681
    },
    {
      "epoch": 0.30338078291814946,
      "grad_norm": 0.32145023345947266,
      "learning_rate": 6.391959798994975e-05,
      "loss": 0.9225,
      "step": 682
    },
    {
      "epoch": 0.3038256227758007,
      "grad_norm": 0.37335747480392456,
      "learning_rate": 6.371859296482412e-05,
      "loss": 0.9586,
      "step": 683
    },
    {
      "epoch": 0.304270462633452,
      "grad_norm": 0.3109886646270752,
      "learning_rate": 6.35175879396985e-05,
      "loss": 0.9308,
      "step": 684
    },
    {
      "epoch": 0.3047153024911032,
      "grad_norm": 0.3259667456150055,
      "learning_rate": 6.331658291457287e-05,
      "loss": 0.9322,
      "step": 685
    },
    {
      "epoch": 0.30516014234875444,
      "grad_norm": 0.35218545794487,
      "learning_rate": 6.311557788944724e-05,
      "loss": 0.9853,
      "step": 686
    },
    {
      "epoch": 0.3056049822064057,
      "grad_norm": 0.3094191551208496,
      "learning_rate": 6.291457286432161e-05,
      "loss": 0.8857,
      "step": 687
    },
    {
      "epoch": 0.30604982206405695,
      "grad_norm": 0.3218885064125061,
      "learning_rate": 6.271356783919599e-05,
      "loss": 0.9363,
      "step": 688
    },
    {
      "epoch": 0.3064946619217082,
      "grad_norm": 0.4279980957508087,
      "learning_rate": 6.251256281407035e-05,
      "loss": 0.9375,
      "step": 689
    },
    {
      "epoch": 0.3069395017793594,
      "grad_norm": 0.3290557861328125,
      "learning_rate": 6.231155778894473e-05,
      "loss": 0.8822,
      "step": 690
    },
    {
      "epoch": 0.30738434163701067,
      "grad_norm": 0.3323034942150116,
      "learning_rate": 6.211055276381909e-05,
      "loss": 1.016,
      "step": 691
    },
    {
      "epoch": 0.30782918149466193,
      "grad_norm": 0.31911730766296387,
      "learning_rate": 6.190954773869348e-05,
      "loss": 0.8814,
      "step": 692
    },
    {
      "epoch": 0.3082740213523132,
      "grad_norm": 0.3330342471599579,
      "learning_rate": 6.170854271356784e-05,
      "loss": 0.9745,
      "step": 693
    },
    {
      "epoch": 0.3087188612099644,
      "grad_norm": 0.42443540692329407,
      "learning_rate": 6.150753768844222e-05,
      "loss": 0.8555,
      "step": 694
    },
    {
      "epoch": 0.30916370106761565,
      "grad_norm": 0.3406936526298523,
      "learning_rate": 6.130653266331658e-05,
      "loss": 0.9958,
      "step": 695
    },
    {
      "epoch": 0.3096085409252669,
      "grad_norm": 0.4078681766986847,
      "learning_rate": 6.110552763819096e-05,
      "loss": 0.8692,
      "step": 696
    },
    {
      "epoch": 0.31005338078291816,
      "grad_norm": 0.4206138849258423,
      "learning_rate": 6.090452261306533e-05,
      "loss": 0.9255,
      "step": 697
    },
    {
      "epoch": 0.3104982206405694,
      "grad_norm": 0.37396302819252014,
      "learning_rate": 6.070351758793971e-05,
      "loss": 0.9942,
      "step": 698
    },
    {
      "epoch": 0.3109430604982206,
      "grad_norm": 0.3433186411857605,
      "learning_rate": 6.0502512562814076e-05,
      "loss": 0.8593,
      "step": 699
    },
    {
      "epoch": 0.3113879003558719,
      "grad_norm": 0.37683218717575073,
      "learning_rate": 6.030150753768844e-05,
      "loss": 0.8921,
      "step": 700
    },
    {
      "epoch": 0.31183274021352314,
      "grad_norm": 0.3779331147670746,
      "learning_rate": 6.0100502512562815e-05,
      "loss": 0.9528,
      "step": 701
    },
    {
      "epoch": 0.3122775800711744,
      "grad_norm": 0.296854168176651,
      "learning_rate": 5.989949748743718e-05,
      "loss": 0.8811,
      "step": 702
    },
    {
      "epoch": 0.3127224199288256,
      "grad_norm": 0.4115276336669922,
      "learning_rate": 5.969849246231156e-05,
      "loss": 0.9749,
      "step": 703
    },
    {
      "epoch": 0.31316725978647686,
      "grad_norm": 0.36785462498664856,
      "learning_rate": 5.949748743718593e-05,
      "loss": 0.9813,
      "step": 704
    },
    {
      "epoch": 0.3136120996441281,
      "grad_norm": 0.4077514410018921,
      "learning_rate": 5.929648241206031e-05,
      "loss": 0.9611,
      "step": 705
    },
    {
      "epoch": 0.3140569395017794,
      "grad_norm": 0.34254419803619385,
      "learning_rate": 5.909547738693467e-05,
      "loss": 0.9411,
      "step": 706
    },
    {
      "epoch": 0.3145017793594306,
      "grad_norm": 0.3231055736541748,
      "learning_rate": 5.889447236180905e-05,
      "loss": 0.8937,
      "step": 707
    },
    {
      "epoch": 0.31494661921708184,
      "grad_norm": 0.46129921078681946,
      "learning_rate": 5.869346733668342e-05,
      "loss": 1.0143,
      "step": 708
    },
    {
      "epoch": 0.3153914590747331,
      "grad_norm": 0.31858929991722107,
      "learning_rate": 5.849246231155779e-05,
      "loss": 0.8875,
      "step": 709
    },
    {
      "epoch": 0.31583629893238435,
      "grad_norm": 0.34170815348625183,
      "learning_rate": 5.829145728643216e-05,
      "loss": 0.9015,
      "step": 710
    },
    {
      "epoch": 0.3162811387900356,
      "grad_norm": 0.3193584084510803,
      "learning_rate": 5.809045226130654e-05,
      "loss": 0.9041,
      "step": 711
    },
    {
      "epoch": 0.3167259786476868,
      "grad_norm": 0.3801109790802002,
      "learning_rate": 5.7889447236180904e-05,
      "loss": 0.8286,
      "step": 712
    },
    {
      "epoch": 0.31717081850533807,
      "grad_norm": 0.3580700159072876,
      "learning_rate": 5.7688442211055284e-05,
      "loss": 0.8942,
      "step": 713
    },
    {
      "epoch": 0.31761565836298933,
      "grad_norm": 0.4236672818660736,
      "learning_rate": 5.748743718592965e-05,
      "loss": 0.9645,
      "step": 714
    },
    {
      "epoch": 0.3180604982206406,
      "grad_norm": 0.43990832567214966,
      "learning_rate": 5.728643216080403e-05,
      "loss": 1.0032,
      "step": 715
    },
    {
      "epoch": 0.3185053380782918,
      "grad_norm": 0.34689345955848694,
      "learning_rate": 5.7085427135678396e-05,
      "loss": 0.8941,
      "step": 716
    },
    {
      "epoch": 0.31895017793594305,
      "grad_norm": 0.31815850734710693,
      "learning_rate": 5.688442211055277e-05,
      "loss": 0.8067,
      "step": 717
    },
    {
      "epoch": 0.3193950177935943,
      "grad_norm": 0.36961331963539124,
      "learning_rate": 5.6683417085427135e-05,
      "loss": 0.9644,
      "step": 718
    },
    {
      "epoch": 0.31983985765124556,
      "grad_norm": 0.35958558320999146,
      "learning_rate": 5.6482412060301515e-05,
      "loss": 0.89,
      "step": 719
    },
    {
      "epoch": 0.3202846975088968,
      "grad_norm": 0.3142717480659485,
      "learning_rate": 5.628140703517588e-05,
      "loss": 0.9572,
      "step": 720
    },
    {
      "epoch": 0.320729537366548,
      "grad_norm": 0.3404165506362915,
      "learning_rate": 5.608040201005026e-05,
      "loss": 0.9028,
      "step": 721
    },
    {
      "epoch": 0.3211743772241993,
      "grad_norm": 0.33231121301651,
      "learning_rate": 5.587939698492463e-05,
      "loss": 0.8523,
      "step": 722
    },
    {
      "epoch": 0.32161921708185054,
      "grad_norm": 0.3641085624694824,
      "learning_rate": 5.567839195979899e-05,
      "loss": 0.9232,
      "step": 723
    },
    {
      "epoch": 0.3220640569395018,
      "grad_norm": 0.4227713644504547,
      "learning_rate": 5.547738693467337e-05,
      "loss": 0.872,
      "step": 724
    },
    {
      "epoch": 0.322508896797153,
      "grad_norm": 0.3884482979774475,
      "learning_rate": 5.527638190954774e-05,
      "loss": 0.9237,
      "step": 725
    },
    {
      "epoch": 0.32295373665480426,
      "grad_norm": 0.4094702899456024,
      "learning_rate": 5.507537688442211e-05,
      "loss": 0.9966,
      "step": 726
    },
    {
      "epoch": 0.3233985765124555,
      "grad_norm": 0.39778733253479004,
      "learning_rate": 5.487437185929648e-05,
      "loss": 0.8983,
      "step": 727
    },
    {
      "epoch": 0.3238434163701068,
      "grad_norm": 0.3704332709312439,
      "learning_rate": 5.467336683417086e-05,
      "loss": 0.9645,
      "step": 728
    },
    {
      "epoch": 0.32428825622775803,
      "grad_norm": 0.39981192350387573,
      "learning_rate": 5.4472361809045224e-05,
      "loss": 0.9289,
      "step": 729
    },
    {
      "epoch": 0.32473309608540923,
      "grad_norm": 0.44905000925064087,
      "learning_rate": 5.4271356783919604e-05,
      "loss": 1.0632,
      "step": 730
    },
    {
      "epoch": 0.3251779359430605,
      "grad_norm": 0.4241604208946228,
      "learning_rate": 5.407035175879397e-05,
      "loss": 0.9888,
      "step": 731
    },
    {
      "epoch": 0.32562277580071175,
      "grad_norm": 0.30892229080200195,
      "learning_rate": 5.386934673366835e-05,
      "loss": 0.8545,
      "step": 732
    },
    {
      "epoch": 0.326067615658363,
      "grad_norm": 0.36549416184425354,
      "learning_rate": 5.3668341708542716e-05,
      "loss": 0.9532,
      "step": 733
    },
    {
      "epoch": 0.3265124555160142,
      "grad_norm": 0.36795175075531006,
      "learning_rate": 5.346733668341709e-05,
      "loss": 0.9992,
      "step": 734
    },
    {
      "epoch": 0.32695729537366547,
      "grad_norm": 0.3399661183357239,
      "learning_rate": 5.3266331658291455e-05,
      "loss": 0.9349,
      "step": 735
    },
    {
      "epoch": 0.3274021352313167,
      "grad_norm": 0.5193089842796326,
      "learning_rate": 5.3065326633165835e-05,
      "loss": 0.9922,
      "step": 736
    },
    {
      "epoch": 0.327846975088968,
      "grad_norm": 0.3065936863422394,
      "learning_rate": 5.28643216080402e-05,
      "loss": 0.9197,
      "step": 737
    },
    {
      "epoch": 0.32829181494661924,
      "grad_norm": 0.37638553977012634,
      "learning_rate": 5.266331658291458e-05,
      "loss": 0.8854,
      "step": 738
    },
    {
      "epoch": 0.32873665480427045,
      "grad_norm": 0.36696720123291016,
      "learning_rate": 5.246231155778895e-05,
      "loss": 0.8642,
      "step": 739
    },
    {
      "epoch": 0.3291814946619217,
      "grad_norm": 0.33234134316444397,
      "learning_rate": 5.226130653266332e-05,
      "loss": 0.9507,
      "step": 740
    },
    {
      "epoch": 0.32962633451957296,
      "grad_norm": 0.3777725398540497,
      "learning_rate": 5.206030150753769e-05,
      "loss": 0.9751,
      "step": 741
    },
    {
      "epoch": 0.3300711743772242,
      "grad_norm": 0.3410782516002655,
      "learning_rate": 5.1859296482412066e-05,
      "loss": 0.8185,
      "step": 742
    },
    {
      "epoch": 0.3305160142348754,
      "grad_norm": 0.5856947302818298,
      "learning_rate": 5.165829145728643e-05,
      "loss": 1.0592,
      "step": 743
    },
    {
      "epoch": 0.3309608540925267,
      "grad_norm": 0.35289299488067627,
      "learning_rate": 5.145728643216081e-05,
      "loss": 1.1304,
      "step": 744
    },
    {
      "epoch": 0.33140569395017794,
      "grad_norm": 0.4018556475639343,
      "learning_rate": 5.125628140703518e-05,
      "loss": 1.0516,
      "step": 745
    },
    {
      "epoch": 0.3318505338078292,
      "grad_norm": 0.402352899312973,
      "learning_rate": 5.1055276381909544e-05,
      "loss": 1.0456,
      "step": 746
    },
    {
      "epoch": 0.33229537366548045,
      "grad_norm": 0.36698055267333984,
      "learning_rate": 5.0854271356783924e-05,
      "loss": 1.0091,
      "step": 747
    },
    {
      "epoch": 0.33274021352313166,
      "grad_norm": 0.34098100662231445,
      "learning_rate": 5.065326633165829e-05,
      "loss": 0.8725,
      "step": 748
    },
    {
      "epoch": 0.3331850533807829,
      "grad_norm": 0.3870490789413452,
      "learning_rate": 5.045226130653266e-05,
      "loss": 0.9775,
      "step": 749
    },
    {
      "epoch": 0.33362989323843417,
      "grad_norm": 0.3566199839115143,
      "learning_rate": 5.0251256281407036e-05,
      "loss": 0.842,
      "step": 750
    },
    {
      "epoch": 0.33407473309608543,
      "grad_norm": 0.4538438618183136,
      "learning_rate": 5.005025125628141e-05,
      "loss": 0.9158,
      "step": 751
    },
    {
      "epoch": 0.33451957295373663,
      "grad_norm": 0.3462452292442322,
      "learning_rate": 4.984924623115578e-05,
      "loss": 0.8715,
      "step": 752
    },
    {
      "epoch": 0.3349644128113879,
      "grad_norm": 0.40105798840522766,
      "learning_rate": 4.9648241206030155e-05,
      "loss": 1.0943,
      "step": 753
    },
    {
      "epoch": 0.33540925266903915,
      "grad_norm": 0.38309648633003235,
      "learning_rate": 4.944723618090453e-05,
      "loss": 0.8961,
      "step": 754
    },
    {
      "epoch": 0.3358540925266904,
      "grad_norm": 0.31808745861053467,
      "learning_rate": 4.92462311557789e-05,
      "loss": 0.8785,
      "step": 755
    },
    {
      "epoch": 0.33629893238434166,
      "grad_norm": 0.3787521421909332,
      "learning_rate": 4.9045226130653274e-05,
      "loss": 0.8932,
      "step": 756
    },
    {
      "epoch": 0.33674377224199287,
      "grad_norm": 0.36229708790779114,
      "learning_rate": 4.884422110552764e-05,
      "loss": 1.01,
      "step": 757
    },
    {
      "epoch": 0.3371886120996441,
      "grad_norm": 0.3797127604484558,
      "learning_rate": 4.864321608040201e-05,
      "loss": 0.8492,
      "step": 758
    },
    {
      "epoch": 0.3376334519572954,
      "grad_norm": 0.3839523196220398,
      "learning_rate": 4.844221105527638e-05,
      "loss": 0.9927,
      "step": 759
    },
    {
      "epoch": 0.33807829181494664,
      "grad_norm": 0.3037039637565613,
      "learning_rate": 4.824120603015075e-05,
      "loss": 0.9326,
      "step": 760
    },
    {
      "epoch": 0.33852313167259784,
      "grad_norm": 0.35209789872169495,
      "learning_rate": 4.8040201005025125e-05,
      "loss": 0.8634,
      "step": 761
    },
    {
      "epoch": 0.3389679715302491,
      "grad_norm": 0.338007390499115,
      "learning_rate": 4.78391959798995e-05,
      "loss": 1.0579,
      "step": 762
    },
    {
      "epoch": 0.33941281138790036,
      "grad_norm": 0.43554559350013733,
      "learning_rate": 4.763819095477387e-05,
      "loss": 1.0572,
      "step": 763
    },
    {
      "epoch": 0.3398576512455516,
      "grad_norm": 0.47670868039131165,
      "learning_rate": 4.7437185929648244e-05,
      "loss": 1.1046,
      "step": 764
    },
    {
      "epoch": 0.3403024911032028,
      "grad_norm": 0.3101906180381775,
      "learning_rate": 4.723618090452262e-05,
      "loss": 0.9641,
      "step": 765
    },
    {
      "epoch": 0.3407473309608541,
      "grad_norm": 0.3647597134113312,
      "learning_rate": 4.703517587939698e-05,
      "loss": 0.9678,
      "step": 766
    },
    {
      "epoch": 0.34119217081850534,
      "grad_norm": 0.40179431438446045,
      "learning_rate": 4.6834170854271356e-05,
      "loss": 0.9187,
      "step": 767
    },
    {
      "epoch": 0.3416370106761566,
      "grad_norm": 0.41022738814353943,
      "learning_rate": 4.663316582914573e-05,
      "loss": 0.9218,
      "step": 768
    },
    {
      "epoch": 0.34208185053380785,
      "grad_norm": 0.3737107515335083,
      "learning_rate": 4.64321608040201e-05,
      "loss": 0.9396,
      "step": 769
    },
    {
      "epoch": 0.34252669039145905,
      "grad_norm": 0.34264299273490906,
      "learning_rate": 4.6231155778894475e-05,
      "loss": 0.8803,
      "step": 770
    },
    {
      "epoch": 0.3429715302491103,
      "grad_norm": 0.363738089799881,
      "learning_rate": 4.603015075376885e-05,
      "loss": 0.9905,
      "step": 771
    },
    {
      "epoch": 0.34341637010676157,
      "grad_norm": 0.4605758488178253,
      "learning_rate": 4.582914572864322e-05,
      "loss": 0.904,
      "step": 772
    },
    {
      "epoch": 0.34386120996441283,
      "grad_norm": 0.3742416799068451,
      "learning_rate": 4.5628140703517594e-05,
      "loss": 0.9772,
      "step": 773
    },
    {
      "epoch": 0.34430604982206403,
      "grad_norm": 0.4309648871421814,
      "learning_rate": 4.542713567839196e-05,
      "loss": 1.0439,
      "step": 774
    },
    {
      "epoch": 0.3447508896797153,
      "grad_norm": 0.3657279908657074,
      "learning_rate": 4.522613065326633e-05,
      "loss": 1.0162,
      "step": 775
    },
    {
      "epoch": 0.34519572953736655,
      "grad_norm": 0.4098432958126068,
      "learning_rate": 4.5025125628140706e-05,
      "loss": 0.9788,
      "step": 776
    },
    {
      "epoch": 0.3456405693950178,
      "grad_norm": 0.4177950918674469,
      "learning_rate": 4.482412060301508e-05,
      "loss": 1.0105,
      "step": 777
    },
    {
      "epoch": 0.34608540925266906,
      "grad_norm": 0.35204648971557617,
      "learning_rate": 4.462311557788945e-05,
      "loss": 0.9713,
      "step": 778
    },
    {
      "epoch": 0.34653024911032027,
      "grad_norm": 0.3725673258304596,
      "learning_rate": 4.4422110552763825e-05,
      "loss": 0.9843,
      "step": 779
    },
    {
      "epoch": 0.3469750889679715,
      "grad_norm": 0.38592076301574707,
      "learning_rate": 4.42211055276382e-05,
      "loss": 0.8528,
      "step": 780
    },
    {
      "epoch": 0.3474199288256228,
      "grad_norm": 0.35379818081855774,
      "learning_rate": 4.4020100502512564e-05,
      "loss": 0.9712,
      "step": 781
    },
    {
      "epoch": 0.34786476868327404,
      "grad_norm": 0.3713836371898651,
      "learning_rate": 4.381909547738694e-05,
      "loss": 0.9531,
      "step": 782
    },
    {
      "epoch": 0.34830960854092524,
      "grad_norm": 0.3799002766609192,
      "learning_rate": 4.3618090452261303e-05,
      "loss": 0.9348,
      "step": 783
    },
    {
      "epoch": 0.3487544483985765,
      "grad_norm": 0.34504106640815735,
      "learning_rate": 4.3417085427135676e-05,
      "loss": 0.8863,
      "step": 784
    },
    {
      "epoch": 0.34919928825622776,
      "grad_norm": 0.2971758544445038,
      "learning_rate": 4.321608040201005e-05,
      "loss": 0.8983,
      "step": 785
    },
    {
      "epoch": 0.349644128113879,
      "grad_norm": 0.34722504019737244,
      "learning_rate": 4.301507537688442e-05,
      "loss": 0.9456,
      "step": 786
    },
    {
      "epoch": 0.3500889679715303,
      "grad_norm": 0.3370276987552643,
      "learning_rate": 4.2814070351758795e-05,
      "loss": 0.9288,
      "step": 787
    },
    {
      "epoch": 0.3505338078291815,
      "grad_norm": 0.3471807837486267,
      "learning_rate": 4.261306532663317e-05,
      "loss": 0.98,
      "step": 788
    },
    {
      "epoch": 0.35097864768683273,
      "grad_norm": 0.3932972252368927,
      "learning_rate": 4.241206030150754e-05,
      "loss": 0.9479,
      "step": 789
    },
    {
      "epoch": 0.351423487544484,
      "grad_norm": 0.36573097109794617,
      "learning_rate": 4.2211055276381914e-05,
      "loss": 0.9339,
      "step": 790
    },
    {
      "epoch": 0.35186832740213525,
      "grad_norm": 0.34445399045944214,
      "learning_rate": 4.201005025125628e-05,
      "loss": 0.807,
      "step": 791
    },
    {
      "epoch": 0.35231316725978645,
      "grad_norm": 0.39604124426841736,
      "learning_rate": 4.180904522613065e-05,
      "loss": 0.9503,
      "step": 792
    },
    {
      "epoch": 0.3527580071174377,
      "grad_norm": 0.3157517910003662,
      "learning_rate": 4.1608040201005026e-05,
      "loss": 0.9381,
      "step": 793
    },
    {
      "epoch": 0.35320284697508897,
      "grad_norm": 0.3242207467556,
      "learning_rate": 4.14070351758794e-05,
      "loss": 0.934,
      "step": 794
    },
    {
      "epoch": 0.3536476868327402,
      "grad_norm": 0.3632892966270447,
      "learning_rate": 4.120603015075377e-05,
      "loss": 0.9653,
      "step": 795
    },
    {
      "epoch": 0.3540925266903915,
      "grad_norm": 0.35393667221069336,
      "learning_rate": 4.1005025125628145e-05,
      "loss": 0.9132,
      "step": 796
    },
    {
      "epoch": 0.3545373665480427,
      "grad_norm": 0.36036303639411926,
      "learning_rate": 4.080402010050252e-05,
      "loss": 1.0329,
      "step": 797
    },
    {
      "epoch": 0.35498220640569395,
      "grad_norm": 0.3710475265979767,
      "learning_rate": 4.060301507537689e-05,
      "loss": 0.8732,
      "step": 798
    },
    {
      "epoch": 0.3554270462633452,
      "grad_norm": 0.37702277302742004,
      "learning_rate": 4.040201005025126e-05,
      "loss": 0.9467,
      "step": 799
    },
    {
      "epoch": 0.35587188612099646,
      "grad_norm": 0.3579627275466919,
      "learning_rate": 4.020100502512563e-05,
      "loss": 0.9603,
      "step": 800
    },
    {
      "epoch": 0.35631672597864766,
      "grad_norm": 0.44332355260849,
      "learning_rate": 4e-05,
      "loss": 0.9611,
      "step": 801
    },
    {
      "epoch": 0.3567615658362989,
      "grad_norm": 0.30799567699432373,
      "learning_rate": 3.9798994974874376e-05,
      "loss": 0.9908,
      "step": 802
    },
    {
      "epoch": 0.3572064056939502,
      "grad_norm": 0.38729211688041687,
      "learning_rate": 3.959798994974875e-05,
      "loss": 0.9629,
      "step": 803
    },
    {
      "epoch": 0.35765124555160144,
      "grad_norm": 0.4034986197948456,
      "learning_rate": 3.9396984924623115e-05,
      "loss": 0.9545,
      "step": 804
    },
    {
      "epoch": 0.3580960854092527,
      "grad_norm": 0.35203176736831665,
      "learning_rate": 3.919597989949749e-05,
      "loss": 0.8863,
      "step": 805
    },
    {
      "epoch": 0.3585409252669039,
      "grad_norm": 0.3381657004356384,
      "learning_rate": 3.899497487437186e-05,
      "loss": 0.9144,
      "step": 806
    },
    {
      "epoch": 0.35898576512455516,
      "grad_norm": 0.34250691533088684,
      "learning_rate": 3.8793969849246234e-05,
      "loss": 0.8687,
      "step": 807
    },
    {
      "epoch": 0.3594306049822064,
      "grad_norm": 0.3308602273464203,
      "learning_rate": 3.85929648241206e-05,
      "loss": 0.9292,
      "step": 808
    },
    {
      "epoch": 0.35987544483985767,
      "grad_norm": 0.3888717591762543,
      "learning_rate": 3.8391959798994973e-05,
      "loss": 0.9774,
      "step": 809
    },
    {
      "epoch": 0.3603202846975089,
      "grad_norm": 0.37949973344802856,
      "learning_rate": 3.8190954773869346e-05,
      "loss": 0.9278,
      "step": 810
    },
    {
      "epoch": 0.36076512455516013,
      "grad_norm": 0.3511112332344055,
      "learning_rate": 3.798994974874372e-05,
      "loss": 0.9714,
      "step": 811
    },
    {
      "epoch": 0.3612099644128114,
      "grad_norm": 0.3813224732875824,
      "learning_rate": 3.778894472361809e-05,
      "loss": 1.0487,
      "step": 812
    },
    {
      "epoch": 0.36165480427046265,
      "grad_norm": 0.4005330502986908,
      "learning_rate": 3.7587939698492465e-05,
      "loss": 0.9673,
      "step": 813
    },
    {
      "epoch": 0.3620996441281139,
      "grad_norm": 0.3238542675971985,
      "learning_rate": 3.738693467336684e-05,
      "loss": 0.9409,
      "step": 814
    },
    {
      "epoch": 0.3625444839857651,
      "grad_norm": 0.29547253251075745,
      "learning_rate": 3.7185929648241204e-05,
      "loss": 0.9494,
      "step": 815
    },
    {
      "epoch": 0.36298932384341637,
      "grad_norm": 0.45984339714050293,
      "learning_rate": 3.698492462311558e-05,
      "loss": 0.8507,
      "step": 816
    },
    {
      "epoch": 0.3634341637010676,
      "grad_norm": 0.3637581169605255,
      "learning_rate": 3.678391959798995e-05,
      "loss": 1.0145,
      "step": 817
    },
    {
      "epoch": 0.3638790035587189,
      "grad_norm": 0.3606102764606476,
      "learning_rate": 3.658291457286432e-05,
      "loss": 0.877,
      "step": 818
    },
    {
      "epoch": 0.3643238434163701,
      "grad_norm": 0.33879634737968445,
      "learning_rate": 3.6381909547738696e-05,
      "loss": 0.9165,
      "step": 819
    },
    {
      "epoch": 0.36476868327402134,
      "grad_norm": 0.3897345960140228,
      "learning_rate": 3.618090452261307e-05,
      "loss": 0.9762,
      "step": 820
    },
    {
      "epoch": 0.3652135231316726,
      "grad_norm": 0.39814460277557373,
      "learning_rate": 3.597989949748744e-05,
      "loss": 0.9111,
      "step": 821
    },
    {
      "epoch": 0.36565836298932386,
      "grad_norm": 0.40529152750968933,
      "learning_rate": 3.5778894472361815e-05,
      "loss": 0.8778,
      "step": 822
    },
    {
      "epoch": 0.36610320284697506,
      "grad_norm": 0.34325361251831055,
      "learning_rate": 3.557788944723618e-05,
      "loss": 0.903,
      "step": 823
    },
    {
      "epoch": 0.3665480427046263,
      "grad_norm": 0.36211341619491577,
      "learning_rate": 3.5376884422110554e-05,
      "loss": 0.963,
      "step": 824
    },
    {
      "epoch": 0.3669928825622776,
      "grad_norm": 0.4107413589954376,
      "learning_rate": 3.517587939698493e-05,
      "loss": 0.9007,
      "step": 825
    },
    {
      "epoch": 0.36743772241992884,
      "grad_norm": 0.35432523488998413,
      "learning_rate": 3.49748743718593e-05,
      "loss": 0.8406,
      "step": 826
    },
    {
      "epoch": 0.3678825622775801,
      "grad_norm": 0.4027344286441803,
      "learning_rate": 3.4773869346733667e-05,
      "loss": 1.0062,
      "step": 827
    },
    {
      "epoch": 0.3683274021352313,
      "grad_norm": 0.34778210520744324,
      "learning_rate": 3.457286432160804e-05,
      "loss": 1.0124,
      "step": 828
    },
    {
      "epoch": 0.36877224199288255,
      "grad_norm": 0.3145458996295929,
      "learning_rate": 3.437185929648241e-05,
      "loss": 0.8455,
      "step": 829
    },
    {
      "epoch": 0.3692170818505338,
      "grad_norm": 0.39053332805633545,
      "learning_rate": 3.4170854271356785e-05,
      "loss": 0.9635,
      "step": 830
    },
    {
      "epoch": 0.36966192170818507,
      "grad_norm": 0.3456934988498688,
      "learning_rate": 3.396984924623116e-05,
      "loss": 0.9399,
      "step": 831
    },
    {
      "epoch": 0.3701067615658363,
      "grad_norm": 0.34614813327789307,
      "learning_rate": 3.3768844221105525e-05,
      "loss": 0.9724,
      "step": 832
    },
    {
      "epoch": 0.37055160142348753,
      "grad_norm": 0.45311570167541504,
      "learning_rate": 3.35678391959799e-05,
      "loss": 1.0435,
      "step": 833
    },
    {
      "epoch": 0.3709964412811388,
      "grad_norm": 0.3952670395374298,
      "learning_rate": 3.336683417085427e-05,
      "loss": 0.9251,
      "step": 834
    },
    {
      "epoch": 0.37144128113879005,
      "grad_norm": 0.3246530592441559,
      "learning_rate": 3.3165829145728643e-05,
      "loss": 0.9194,
      "step": 835
    },
    {
      "epoch": 0.3718861209964413,
      "grad_norm": 0.3489208221435547,
      "learning_rate": 3.2964824120603016e-05,
      "loss": 0.9522,
      "step": 836
    },
    {
      "epoch": 0.3723309608540925,
      "grad_norm": 0.3140431046485901,
      "learning_rate": 3.276381909547739e-05,
      "loss": 0.9511,
      "step": 837
    },
    {
      "epoch": 0.37277580071174377,
      "grad_norm": 0.3103282153606415,
      "learning_rate": 3.256281407035176e-05,
      "loss": 0.8739,
      "step": 838
    },
    {
      "epoch": 0.373220640569395,
      "grad_norm": 0.3995440602302551,
      "learning_rate": 3.2361809045226135e-05,
      "loss": 0.9918,
      "step": 839
    },
    {
      "epoch": 0.3736654804270463,
      "grad_norm": 0.39125266671180725,
      "learning_rate": 3.21608040201005e-05,
      "loss": 0.9751,
      "step": 840
    },
    {
      "epoch": 0.3741103202846975,
      "grad_norm": 0.3043217658996582,
      "learning_rate": 3.1959798994974875e-05,
      "loss": 0.8899,
      "step": 841
    },
    {
      "epoch": 0.37455516014234874,
      "grad_norm": 0.4185977280139923,
      "learning_rate": 3.175879396984925e-05,
      "loss": 0.9169,
      "step": 842
    },
    {
      "epoch": 0.375,
      "grad_norm": 0.3753814697265625,
      "learning_rate": 3.155778894472362e-05,
      "loss": 0.951,
      "step": 843
    },
    {
      "epoch": 0.37544483985765126,
      "grad_norm": 0.33144545555114746,
      "learning_rate": 3.1356783919597993e-05,
      "loss": 0.9228,
      "step": 844
    },
    {
      "epoch": 0.3758896797153025,
      "grad_norm": 0.3024055063724518,
      "learning_rate": 3.1155778894472366e-05,
      "loss": 0.9157,
      "step": 845
    },
    {
      "epoch": 0.3763345195729537,
      "grad_norm": 0.3643604815006256,
      "learning_rate": 3.095477386934674e-05,
      "loss": 0.9387,
      "step": 846
    },
    {
      "epoch": 0.376779359430605,
      "grad_norm": 0.426024854183197,
      "learning_rate": 3.075376884422111e-05,
      "loss": 1.0013,
      "step": 847
    },
    {
      "epoch": 0.37722419928825623,
      "grad_norm": 0.3174848258495331,
      "learning_rate": 3.055276381909548e-05,
      "loss": 0.914,
      "step": 848
    },
    {
      "epoch": 0.3776690391459075,
      "grad_norm": 0.3318672478199005,
      "learning_rate": 3.0351758793969855e-05,
      "loss": 0.9502,
      "step": 849
    },
    {
      "epoch": 0.3781138790035587,
      "grad_norm": 0.4423961937427521,
      "learning_rate": 3.015075376884422e-05,
      "loss": 0.9341,
      "step": 850
    },
    {
      "epoch": 0.37855871886120995,
      "grad_norm": 0.40427151322364807,
      "learning_rate": 2.994974874371859e-05,
      "loss": 0.8887,
      "step": 851
    },
    {
      "epoch": 0.3790035587188612,
      "grad_norm": 0.3664209246635437,
      "learning_rate": 2.9748743718592964e-05,
      "loss": 0.8858,
      "step": 852
    },
    {
      "epoch": 0.37944839857651247,
      "grad_norm": 0.3712497353553772,
      "learning_rate": 2.9547738693467337e-05,
      "loss": 0.8004,
      "step": 853
    },
    {
      "epoch": 0.3798932384341637,
      "grad_norm": 0.4214048981666565,
      "learning_rate": 2.934673366834171e-05,
      "loss": 1.0013,
      "step": 854
    },
    {
      "epoch": 0.38033807829181493,
      "grad_norm": 0.3496900498867035,
      "learning_rate": 2.914572864321608e-05,
      "loss": 0.908,
      "step": 855
    },
    {
      "epoch": 0.3807829181494662,
      "grad_norm": 0.4434766471385956,
      "learning_rate": 2.8944723618090452e-05,
      "loss": 0.9394,
      "step": 856
    },
    {
      "epoch": 0.38122775800711745,
      "grad_norm": 0.37649184465408325,
      "learning_rate": 2.8743718592964825e-05,
      "loss": 0.9892,
      "step": 857
    },
    {
      "epoch": 0.3816725978647687,
      "grad_norm": 0.35058286786079407,
      "learning_rate": 2.8542713567839198e-05,
      "loss": 0.9576,
      "step": 858
    },
    {
      "epoch": 0.3821174377224199,
      "grad_norm": 0.3940383493900299,
      "learning_rate": 2.8341708542713568e-05,
      "loss": 0.9078,
      "step": 859
    },
    {
      "epoch": 0.38256227758007116,
      "grad_norm": 0.31289801001548767,
      "learning_rate": 2.814070351758794e-05,
      "loss": 0.8471,
      "step": 860
    },
    {
      "epoch": 0.3830071174377224,
      "grad_norm": 0.3773019313812256,
      "learning_rate": 2.7939698492462314e-05,
      "loss": 1.0291,
      "step": 861
    },
    {
      "epoch": 0.3834519572953737,
      "grad_norm": 0.38689449429512024,
      "learning_rate": 2.7738693467336686e-05,
      "loss": 0.965,
      "step": 862
    },
    {
      "epoch": 0.38389679715302494,
      "grad_norm": 0.4001306891441345,
      "learning_rate": 2.7537688442211056e-05,
      "loss": 0.9251,
      "step": 863
    },
    {
      "epoch": 0.38434163701067614,
      "grad_norm": 0.2970896363258362,
      "learning_rate": 2.733668341708543e-05,
      "loss": 0.8795,
      "step": 864
    },
    {
      "epoch": 0.3847864768683274,
      "grad_norm": 0.3230406939983368,
      "learning_rate": 2.7135678391959802e-05,
      "loss": 0.9162,
      "step": 865
    },
    {
      "epoch": 0.38523131672597866,
      "grad_norm": 0.38683274388313293,
      "learning_rate": 2.6934673366834175e-05,
      "loss": 0.921,
      "step": 866
    },
    {
      "epoch": 0.3856761565836299,
      "grad_norm": 0.36258265376091003,
      "learning_rate": 2.6733668341708545e-05,
      "loss": 0.9282,
      "step": 867
    },
    {
      "epoch": 0.3861209964412811,
      "grad_norm": 0.30888402462005615,
      "learning_rate": 2.6532663316582917e-05,
      "loss": 0.9154,
      "step": 868
    },
    {
      "epoch": 0.3865658362989324,
      "grad_norm": 0.4258480370044708,
      "learning_rate": 2.633165829145729e-05,
      "loss": 0.9405,
      "step": 869
    },
    {
      "epoch": 0.38701067615658363,
      "grad_norm": 0.33099818229675293,
      "learning_rate": 2.613065326633166e-05,
      "loss": 0.923,
      "step": 870
    },
    {
      "epoch": 0.3874555160142349,
      "grad_norm": 0.3900362253189087,
      "learning_rate": 2.5929648241206033e-05,
      "loss": 0.8813,
      "step": 871
    },
    {
      "epoch": 0.3879003558718861,
      "grad_norm": 0.3388621509075165,
      "learning_rate": 2.5728643216080406e-05,
      "loss": 0.9765,
      "step": 872
    },
    {
      "epoch": 0.38834519572953735,
      "grad_norm": 0.32633256912231445,
      "learning_rate": 2.5527638190954772e-05,
      "loss": 0.8824,
      "step": 873
    },
    {
      "epoch": 0.3887900355871886,
      "grad_norm": 0.34141066670417786,
      "learning_rate": 2.5326633165829145e-05,
      "loss": 0.862,
      "step": 874
    },
    {
      "epoch": 0.38923487544483987,
      "grad_norm": 0.39966803789138794,
      "learning_rate": 2.5125628140703518e-05,
      "loss": 0.9406,
      "step": 875
    },
    {
      "epoch": 0.3896797153024911,
      "grad_norm": 0.34703922271728516,
      "learning_rate": 2.492462311557789e-05,
      "loss": 0.9392,
      "step": 876
    },
    {
      "epoch": 0.39012455516014233,
      "grad_norm": 0.3692745268344879,
      "learning_rate": 2.4723618090452264e-05,
      "loss": 0.838,
      "step": 877
    },
    {
      "epoch": 0.3905693950177936,
      "grad_norm": 0.3813033401966095,
      "learning_rate": 2.4522613065326637e-05,
      "loss": 0.9557,
      "step": 878
    },
    {
      "epoch": 0.39101423487544484,
      "grad_norm": 0.361794650554657,
      "learning_rate": 2.4321608040201007e-05,
      "loss": 0.7851,
      "step": 879
    },
    {
      "epoch": 0.3914590747330961,
      "grad_norm": 0.3740581274032593,
      "learning_rate": 2.4120603015075376e-05,
      "loss": 1.0353,
      "step": 880
    },
    {
      "epoch": 0.3919039145907473,
      "grad_norm": 0.3482462465763092,
      "learning_rate": 2.391959798994975e-05,
      "loss": 0.9339,
      "step": 881
    },
    {
      "epoch": 0.39234875444839856,
      "grad_norm": 0.35234349966049194,
      "learning_rate": 2.3718592964824122e-05,
      "loss": 0.8892,
      "step": 882
    },
    {
      "epoch": 0.3927935943060498,
      "grad_norm": 0.349231481552124,
      "learning_rate": 2.351758793969849e-05,
      "loss": 0.9183,
      "step": 883
    },
    {
      "epoch": 0.3932384341637011,
      "grad_norm": 0.3892892599105835,
      "learning_rate": 2.3316582914572865e-05,
      "loss": 1.0786,
      "step": 884
    },
    {
      "epoch": 0.39368327402135234,
      "grad_norm": 0.4677392840385437,
      "learning_rate": 2.3115577889447238e-05,
      "loss": 0.9138,
      "step": 885
    },
    {
      "epoch": 0.39412811387900354,
      "grad_norm": 0.32070422172546387,
      "learning_rate": 2.291457286432161e-05,
      "loss": 0.9076,
      "step": 886
    },
    {
      "epoch": 0.3945729537366548,
      "grad_norm": 0.30801644921302795,
      "learning_rate": 2.271356783919598e-05,
      "loss": 0.9457,
      "step": 887
    },
    {
      "epoch": 0.39501779359430605,
      "grad_norm": 0.4170681834220886,
      "learning_rate": 2.2512562814070353e-05,
      "loss": 0.9452,
      "step": 888
    },
    {
      "epoch": 0.3954626334519573,
      "grad_norm": 0.44136953353881836,
      "learning_rate": 2.2311557788944726e-05,
      "loss": 1.1165,
      "step": 889
    },
    {
      "epoch": 0.3959074733096085,
      "grad_norm": 0.35362014174461365,
      "learning_rate": 2.21105527638191e-05,
      "loss": 0.9155,
      "step": 890
    },
    {
      "epoch": 0.3963523131672598,
      "grad_norm": 0.3813976049423218,
      "learning_rate": 2.190954773869347e-05,
      "loss": 0.8541,
      "step": 891
    },
    {
      "epoch": 0.39679715302491103,
      "grad_norm": 0.41842445731163025,
      "learning_rate": 2.1708542713567838e-05,
      "loss": 0.9675,
      "step": 892
    },
    {
      "epoch": 0.3972419928825623,
      "grad_norm": 0.3513477146625519,
      "learning_rate": 2.150753768844221e-05,
      "loss": 0.9387,
      "step": 893
    },
    {
      "epoch": 0.39768683274021355,
      "grad_norm": 0.3313136398792267,
      "learning_rate": 2.1306532663316584e-05,
      "loss": 0.9186,
      "step": 894
    },
    {
      "epoch": 0.39813167259786475,
      "grad_norm": 0.370280921459198,
      "learning_rate": 2.1105527638190957e-05,
      "loss": 1.1071,
      "step": 895
    },
    {
      "epoch": 0.398576512455516,
      "grad_norm": 0.3955540657043457,
      "learning_rate": 2.0904522613065327e-05,
      "loss": 0.9314,
      "step": 896
    },
    {
      "epoch": 0.39902135231316727,
      "grad_norm": 0.394826203584671,
      "learning_rate": 2.07035175879397e-05,
      "loss": 0.9315,
      "step": 897
    },
    {
      "epoch": 0.3994661921708185,
      "grad_norm": 0.35463854670524597,
      "learning_rate": 2.0502512562814073e-05,
      "loss": 0.9011,
      "step": 898
    },
    {
      "epoch": 0.3999110320284697,
      "grad_norm": 0.3725610673427582,
      "learning_rate": 2.0301507537688446e-05,
      "loss": 0.9455,
      "step": 899
    },
    {
      "epoch": 0.400355871886121,
      "grad_norm": 0.4204149842262268,
      "learning_rate": 2.0100502512562815e-05,
      "loss": 0.973,
      "step": 900
    },
    {
      "epoch": 0.40080071174377224,
      "grad_norm": 0.312836617231369,
      "learning_rate": 1.9899497487437188e-05,
      "loss": 0.875,
      "step": 901
    },
    {
      "epoch": 0.4012455516014235,
      "grad_norm": 0.4284355044364929,
      "learning_rate": 1.9698492462311558e-05,
      "loss": 1.0345,
      "step": 902
    },
    {
      "epoch": 0.40169039145907476,
      "grad_norm": 0.41245731711387634,
      "learning_rate": 1.949748743718593e-05,
      "loss": 0.884,
      "step": 903
    },
    {
      "epoch": 0.40213523131672596,
      "grad_norm": 0.31140172481536865,
      "learning_rate": 1.92964824120603e-05,
      "loss": 0.8928,
      "step": 904
    },
    {
      "epoch": 0.4025800711743772,
      "grad_norm": 0.4398500323295593,
      "learning_rate": 1.9095477386934673e-05,
      "loss": 0.9624,
      "step": 905
    },
    {
      "epoch": 0.4030249110320285,
      "grad_norm": 0.3438228964805603,
      "learning_rate": 1.8894472361809046e-05,
      "loss": 0.8849,
      "step": 906
    },
    {
      "epoch": 0.40346975088967973,
      "grad_norm": 0.3663855195045471,
      "learning_rate": 1.869346733668342e-05,
      "loss": 0.8945,
      "step": 907
    },
    {
      "epoch": 0.40391459074733094,
      "grad_norm": 0.3942212164402008,
      "learning_rate": 1.849246231155779e-05,
      "loss": 0.8601,
      "step": 908
    },
    {
      "epoch": 0.4043594306049822,
      "grad_norm": 0.3836296498775482,
      "learning_rate": 1.829145728643216e-05,
      "loss": 1.0224,
      "step": 909
    },
    {
      "epoch": 0.40480427046263345,
      "grad_norm": 0.41282856464385986,
      "learning_rate": 1.8090452261306535e-05,
      "loss": 0.9005,
      "step": 910
    },
    {
      "epoch": 0.4052491103202847,
      "grad_norm": 0.3127693235874176,
      "learning_rate": 1.7889447236180908e-05,
      "loss": 0.8051,
      "step": 911
    },
    {
      "epoch": 0.40569395017793597,
      "grad_norm": 0.2867036759853363,
      "learning_rate": 1.7688442211055277e-05,
      "loss": 1.0131,
      "step": 912
    },
    {
      "epoch": 0.40613879003558717,
      "grad_norm": 0.28900963068008423,
      "learning_rate": 1.748743718592965e-05,
      "loss": 0.8713,
      "step": 913
    },
    {
      "epoch": 0.40658362989323843,
      "grad_norm": 0.39211320877075195,
      "learning_rate": 1.728643216080402e-05,
      "loss": 0.9688,
      "step": 914
    },
    {
      "epoch": 0.4070284697508897,
      "grad_norm": 0.39720427989959717,
      "learning_rate": 1.7085427135678393e-05,
      "loss": 0.9303,
      "step": 915
    },
    {
      "epoch": 0.40747330960854095,
      "grad_norm": 0.3245285749435425,
      "learning_rate": 1.6884422110552762e-05,
      "loss": 0.8772,
      "step": 916
    },
    {
      "epoch": 0.40791814946619215,
      "grad_norm": 0.3102715015411377,
      "learning_rate": 1.6683417085427135e-05,
      "loss": 0.8974,
      "step": 917
    },
    {
      "epoch": 0.4083629893238434,
      "grad_norm": 0.34904980659484863,
      "learning_rate": 1.6482412060301508e-05,
      "loss": 0.9458,
      "step": 918
    },
    {
      "epoch": 0.40880782918149466,
      "grad_norm": 0.3596024215221405,
      "learning_rate": 1.628140703517588e-05,
      "loss": 0.9627,
      "step": 919
    },
    {
      "epoch": 0.4092526690391459,
      "grad_norm": 0.37325575947761536,
      "learning_rate": 1.608040201005025e-05,
      "loss": 0.7928,
      "step": 920
    },
    {
      "epoch": 0.4096975088967972,
      "grad_norm": 0.3483446538448334,
      "learning_rate": 1.5879396984924624e-05,
      "loss": 0.9163,
      "step": 921
    },
    {
      "epoch": 0.4101423487544484,
      "grad_norm": 0.3493581712245941,
      "learning_rate": 1.5678391959798997e-05,
      "loss": 0.9661,
      "step": 922
    },
    {
      "epoch": 0.41058718861209964,
      "grad_norm": 0.3486252725124359,
      "learning_rate": 1.547738693467337e-05,
      "loss": 0.9236,
      "step": 923
    },
    {
      "epoch": 0.4110320284697509,
      "grad_norm": 0.40376630425453186,
      "learning_rate": 1.527638190954774e-05,
      "loss": 0.9523,
      "step": 924
    },
    {
      "epoch": 0.41147686832740216,
      "grad_norm": 0.39947953820228577,
      "learning_rate": 1.507537688442211e-05,
      "loss": 0.9814,
      "step": 925
    },
    {
      "epoch": 0.41192170818505336,
      "grad_norm": 0.3233617842197418,
      "learning_rate": 1.4874371859296482e-05,
      "loss": 0.9911,
      "step": 926
    },
    {
      "epoch": 0.4123665480427046,
      "grad_norm": 0.3426443040370941,
      "learning_rate": 1.4673366834170855e-05,
      "loss": 1.0236,
      "step": 927
    },
    {
      "epoch": 0.4128113879003559,
      "grad_norm": 0.3716176450252533,
      "learning_rate": 1.4472361809045226e-05,
      "loss": 0.9969,
      "step": 928
    },
    {
      "epoch": 0.41325622775800713,
      "grad_norm": 0.37307268381118774,
      "learning_rate": 1.4271356783919599e-05,
      "loss": 0.9022,
      "step": 929
    },
    {
      "epoch": 0.41370106761565834,
      "grad_norm": 0.31498992443084717,
      "learning_rate": 1.407035175879397e-05,
      "loss": 0.9017,
      "step": 930
    },
    {
      "epoch": 0.4141459074733096,
      "grad_norm": 0.3574257493019104,
      "learning_rate": 1.3869346733668343e-05,
      "loss": 1.0195,
      "step": 931
    },
    {
      "epoch": 0.41459074733096085,
      "grad_norm": 0.3232157230377197,
      "learning_rate": 1.3668341708542715e-05,
      "loss": 0.8746,
      "step": 932
    },
    {
      "epoch": 0.4150355871886121,
      "grad_norm": 0.3905941843986511,
      "learning_rate": 1.3467336683417087e-05,
      "loss": 0.8746,
      "step": 933
    },
    {
      "epoch": 0.41548042704626337,
      "grad_norm": 0.3263537883758545,
      "learning_rate": 1.3266331658291459e-05,
      "loss": 0.8922,
      "step": 934
    },
    {
      "epoch": 0.41592526690391457,
      "grad_norm": 0.3259488642215729,
      "learning_rate": 1.306532663316583e-05,
      "loss": 0.9866,
      "step": 935
    },
    {
      "epoch": 0.41637010676156583,
      "grad_norm": 0.4093850255012512,
      "learning_rate": 1.2864321608040203e-05,
      "loss": 0.8843,
      "step": 936
    },
    {
      "epoch": 0.4168149466192171,
      "grad_norm": 0.3108372390270233,
      "learning_rate": 1.2663316582914573e-05,
      "loss": 0.9248,
      "step": 937
    },
    {
      "epoch": 0.41725978647686834,
      "grad_norm": 0.3368922770023346,
      "learning_rate": 1.2462311557788946e-05,
      "loss": 0.9424,
      "step": 938
    },
    {
      "epoch": 0.41770462633451955,
      "grad_norm": 0.35581493377685547,
      "learning_rate": 1.2261306532663318e-05,
      "loss": 0.9238,
      "step": 939
    },
    {
      "epoch": 0.4181494661921708,
      "grad_norm": 0.392605185508728,
      "learning_rate": 1.2060301507537688e-05,
      "loss": 0.9718,
      "step": 940
    },
    {
      "epoch": 0.41859430604982206,
      "grad_norm": 0.33466875553131104,
      "learning_rate": 1.1859296482412061e-05,
      "loss": 0.8881,
      "step": 941
    },
    {
      "epoch": 0.4190391459074733,
      "grad_norm": 0.4258723258972168,
      "learning_rate": 1.1658291457286432e-05,
      "loss": 0.9908,
      "step": 942
    },
    {
      "epoch": 0.4194839857651246,
      "grad_norm": 0.3950963318347931,
      "learning_rate": 1.1457286432160805e-05,
      "loss": 0.8359,
      "step": 943
    },
    {
      "epoch": 0.4199288256227758,
      "grad_norm": 0.35699567198753357,
      "learning_rate": 1.1256281407035177e-05,
      "loss": 0.9785,
      "step": 944
    },
    {
      "epoch": 0.42037366548042704,
      "grad_norm": 0.3818075954914093,
      "learning_rate": 1.105527638190955e-05,
      "loss": 0.9927,
      "step": 945
    },
    {
      "epoch": 0.4208185053380783,
      "grad_norm": 0.3606509566307068,
      "learning_rate": 1.0854271356783919e-05,
      "loss": 0.8477,
      "step": 946
    },
    {
      "epoch": 0.42126334519572955,
      "grad_norm": 0.3209396004676819,
      "learning_rate": 1.0653266331658292e-05,
      "loss": 0.9992,
      "step": 947
    },
    {
      "epoch": 0.42170818505338076,
      "grad_norm": 0.35111239552497864,
      "learning_rate": 1.0452261306532663e-05,
      "loss": 0.8933,
      "step": 948
    },
    {
      "epoch": 0.422153024911032,
      "grad_norm": 0.3492172658443451,
      "learning_rate": 1.0251256281407036e-05,
      "loss": 0.9133,
      "step": 949
    },
    {
      "epoch": 0.4225978647686833,
      "grad_norm": 0.34597399830818176,
      "learning_rate": 1.0050251256281408e-05,
      "loss": 0.8814,
      "step": 950
    },
    {
      "epoch": 0.42304270462633453,
      "grad_norm": 0.35809046030044556,
      "learning_rate": 9.849246231155779e-06,
      "loss": 0.9919,
      "step": 951
    },
    {
      "epoch": 0.4234875444839858,
      "grad_norm": 0.3955031931400299,
      "learning_rate": 9.64824120603015e-06,
      "loss": 0.8611,
      "step": 952
    },
    {
      "epoch": 0.423932384341637,
      "grad_norm": 0.35886260867118835,
      "learning_rate": 9.447236180904523e-06,
      "loss": 0.9025,
      "step": 953
    },
    {
      "epoch": 0.42437722419928825,
      "grad_norm": 0.340167760848999,
      "learning_rate": 9.246231155778894e-06,
      "loss": 0.9633,
      "step": 954
    },
    {
      "epoch": 0.4248220640569395,
      "grad_norm": 0.3560841977596283,
      "learning_rate": 9.045226130653267e-06,
      "loss": 0.9444,
      "step": 955
    },
    {
      "epoch": 0.42526690391459077,
      "grad_norm": 0.39469799399375916,
      "learning_rate": 8.844221105527639e-06,
      "loss": 0.9697,
      "step": 956
    },
    {
      "epoch": 0.42571174377224197,
      "grad_norm": 0.35771381855010986,
      "learning_rate": 8.64321608040201e-06,
      "loss": 0.944,
      "step": 957
    },
    {
      "epoch": 0.4261565836298932,
      "grad_norm": 0.3121163845062256,
      "learning_rate": 8.442211055276381e-06,
      "loss": 0.8582,
      "step": 958
    },
    {
      "epoch": 0.4266014234875445,
      "grad_norm": 0.39604029059410095,
      "learning_rate": 8.241206030150754e-06,
      "loss": 0.9885,
      "step": 959
    },
    {
      "epoch": 0.42704626334519574,
      "grad_norm": 0.35038042068481445,
      "learning_rate": 8.040201005025125e-06,
      "loss": 0.8873,
      "step": 960
    },
    {
      "epoch": 0.427491103202847,
      "grad_norm": 0.39221829175949097,
      "learning_rate": 7.839195979899498e-06,
      "loss": 0.9252,
      "step": 961
    },
    {
      "epoch": 0.4279359430604982,
      "grad_norm": 0.39741194248199463,
      "learning_rate": 7.63819095477387e-06,
      "loss": 0.9643,
      "step": 962
    },
    {
      "epoch": 0.42838078291814946,
      "grad_norm": 0.511210024356842,
      "learning_rate": 7.437185929648241e-06,
      "loss": 0.9651,
      "step": 963
    },
    {
      "epoch": 0.4288256227758007,
      "grad_norm": 0.4568634331226349,
      "learning_rate": 7.236180904522613e-06,
      "loss": 1.0238,
      "step": 964
    },
    {
      "epoch": 0.429270462633452,
      "grad_norm": 0.36889776587486267,
      "learning_rate": 7.035175879396985e-06,
      "loss": 0.964,
      "step": 965
    },
    {
      "epoch": 0.4297153024911032,
      "grad_norm": 0.34706658124923706,
      "learning_rate": 6.834170854271357e-06,
      "loss": 0.9929,
      "step": 966
    },
    {
      "epoch": 0.43016014234875444,
      "grad_norm": 0.33485299348831177,
      "learning_rate": 6.633165829145729e-06,
      "loss": 0.9141,
      "step": 967
    },
    {
      "epoch": 0.4306049822064057,
      "grad_norm": 0.5187619924545288,
      "learning_rate": 6.4321608040201015e-06,
      "loss": 1.084,
      "step": 968
    },
    {
      "epoch": 0.43104982206405695,
      "grad_norm": 0.41246598958969116,
      "learning_rate": 6.231155778894473e-06,
      "loss": 0.9763,
      "step": 969
    },
    {
      "epoch": 0.4314946619217082,
      "grad_norm": 0.35957226157188416,
      "learning_rate": 6.030150753768844e-06,
      "loss": 0.9251,
      "step": 970
    },
    {
      "epoch": 0.4319395017793594,
      "grad_norm": 0.3485277593135834,
      "learning_rate": 5.829145728643216e-06,
      "loss": 0.832,
      "step": 971
    },
    {
      "epoch": 0.43238434163701067,
      "grad_norm": 0.2965943217277527,
      "learning_rate": 5.628140703517588e-06,
      "loss": 0.9496,
      "step": 972
    },
    {
      "epoch": 0.43282918149466193,
      "grad_norm": 0.3268524706363678,
      "learning_rate": 5.4271356783919595e-06,
      "loss": 0.9876,
      "step": 973
    },
    {
      "epoch": 0.4332740213523132,
      "grad_norm": 0.33093732595443726,
      "learning_rate": 5.226130653266332e-06,
      "loss": 0.9083,
      "step": 974
    },
    {
      "epoch": 0.4337188612099644,
      "grad_norm": 0.3334566652774811,
      "learning_rate": 5.025125628140704e-06,
      "loss": 0.8941,
      "step": 975
    },
    {
      "epoch": 0.43416370106761565,
      "grad_norm": 0.327716201543808,
      "learning_rate": 4.824120603015075e-06,
      "loss": 0.9783,
      "step": 976
    },
    {
      "epoch": 0.4346085409252669,
      "grad_norm": 0.4024101793766022,
      "learning_rate": 4.623115577889447e-06,
      "loss": 0.9505,
      "step": 977
    },
    {
      "epoch": 0.43505338078291816,
      "grad_norm": 0.3500884175300598,
      "learning_rate": 4.422110552763819e-06,
      "loss": 0.9397,
      "step": 978
    },
    {
      "epoch": 0.4354982206405694,
      "grad_norm": 0.3603179156780243,
      "learning_rate": 4.2211055276381906e-06,
      "loss": 0.9335,
      "step": 979
    },
    {
      "epoch": 0.4359430604982206,
      "grad_norm": 0.3110332190990448,
      "learning_rate": 4.020100502512563e-06,
      "loss": 0.9589,
      "step": 980
    },
    {
      "epoch": 0.4363879003558719,
      "grad_norm": 0.3665446937084198,
      "learning_rate": 3.819095477386935e-06,
      "loss": 0.9139,
      "step": 981
    },
    {
      "epoch": 0.43683274021352314,
      "grad_norm": 0.30451545119285583,
      "learning_rate": 3.6180904522613065e-06,
      "loss": 0.9229,
      "step": 982
    },
    {
      "epoch": 0.4372775800711744,
      "grad_norm": 0.359427809715271,
      "learning_rate": 3.4170854271356786e-06,
      "loss": 0.9409,
      "step": 983
    },
    {
      "epoch": 0.4377224199288256,
      "grad_norm": 0.29950541257858276,
      "learning_rate": 3.2160804020100507e-06,
      "loss": 0.8438,
      "step": 984
    },
    {
      "epoch": 0.43816725978647686,
      "grad_norm": 0.44397425651550293,
      "learning_rate": 3.015075376884422e-06,
      "loss": 0.9639,
      "step": 985
    },
    {
      "epoch": 0.4386120996441281,
      "grad_norm": 0.3420177400112152,
      "learning_rate": 2.814070351758794e-06,
      "loss": 0.8911,
      "step": 986
    },
    {
      "epoch": 0.4390569395017794,
      "grad_norm": 0.3904622793197632,
      "learning_rate": 2.613065326633166e-06,
      "loss": 0.9407,
      "step": 987
    },
    {
      "epoch": 0.4395017793594306,
      "grad_norm": 0.2967797517776489,
      "learning_rate": 2.4120603015075375e-06,
      "loss": 0.9157,
      "step": 988
    },
    {
      "epoch": 0.43994661921708184,
      "grad_norm": 0.3801586925983429,
      "learning_rate": 2.2110552763819096e-06,
      "loss": 0.8314,
      "step": 989
    },
    {
      "epoch": 0.4403914590747331,
      "grad_norm": 0.3838663101196289,
      "learning_rate": 2.0100502512562813e-06,
      "loss": 0.8797,
      "step": 990
    },
    {
      "epoch": 0.44083629893238435,
      "grad_norm": 0.3395996689796448,
      "learning_rate": 1.8090452261306533e-06,
      "loss": 0.8795,
      "step": 991
    },
    {
      "epoch": 0.4412811387900356,
      "grad_norm": 0.38413113355636597,
      "learning_rate": 1.6080402010050254e-06,
      "loss": 0.9914,
      "step": 992
    },
    {
      "epoch": 0.4417259786476868,
      "grad_norm": 0.32779523730278015,
      "learning_rate": 1.407035175879397e-06,
      "loss": 0.8799,
      "step": 993
    },
    {
      "epoch": 0.44217081850533807,
      "grad_norm": 0.4115554094314575,
      "learning_rate": 1.2060301507537688e-06,
      "loss": 0.8444,
      "step": 994
    },
    {
      "epoch": 0.44261565836298933,
      "grad_norm": 0.3810219168663025,
      "learning_rate": 1.0050251256281407e-06,
      "loss": 1.0453,
      "step": 995
    },
    {
      "epoch": 0.4430604982206406,
      "grad_norm": 0.40686285495758057,
      "learning_rate": 8.040201005025127e-07,
      "loss": 0.8967,
      "step": 996
    },
    {
      "epoch": 0.4435053380782918,
      "grad_norm": 0.3694639801979065,
      "learning_rate": 6.030150753768844e-07,
      "loss": 0.8554,
      "step": 997
    },
    {
      "epoch": 0.44395017793594305,
      "grad_norm": 0.32546064257621765,
      "learning_rate": 4.0201005025125634e-07,
      "loss": 0.8795,
      "step": 998
    },
    {
      "epoch": 0.4443950177935943,
      "grad_norm": 0.3117218613624573,
      "learning_rate": 2.0100502512562817e-07,
      "loss": 0.9495,
      "step": 999
    },
    {
      "epoch": 0.44483985765124556,
      "grad_norm": 0.36267247796058655,
      "learning_rate": 0.0,
      "loss": 0.9526,
      "step": 1000
    }
  ],
  "logging_steps": 1,
  "max_steps": 1000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.7939011197427712e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}