{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.35008,
  "global_step": 547000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 5e-06,
      "loss": 12.591,
      "step": 100
    },
    {
      "epoch": 0.0,
      "learning_rate": 1e-05,
      "loss": 10.8214,
      "step": 200
    },
    {
      "epoch": 0.0,
      "learning_rate": 1.5e-05,
      "loss": 9.0507,
      "step": 300
    },
    {
      "epoch": 0.0,
      "learning_rate": 2e-05,
      "loss": 7.8913,
      "step": 400
    },
    {
      "epoch": 0.0,
      "learning_rate": 2.5e-05,
      "loss": 7.1015,
      "step": 500
    },
    {
      "epoch": 0.0,
      "learning_rate": 3e-05,
      "loss": 6.6191,
      "step": 600
    },
    {
      "epoch": 0.0,
      "learning_rate": 3.5e-05,
      "loss": 6.1187,
      "step": 700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4e-05,
      "loss": 5.7839,
      "step": 800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.5e-05,
      "loss": 5.604,
      "step": 900
    },
    {
      "epoch": 0.0,
      "learning_rate": 5e-05,
      "loss": 5.3757,
      "step": 1000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.8415253162384033,
      "eval_runtime": 113.1916,
      "eval_samples_per_second": 88.346,
      "eval_steps_per_second": 5.522,
      "step": 1000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.999679795068844e-05,
      "loss": 5.1983,
      "step": 1100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.999359590137688e-05,
      "loss": 5.0588,
      "step": 1200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.999039385206533e-05,
      "loss": 4.9587,
      "step": 1300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.998719180275376e-05,
      "loss": 4.8633,
      "step": 1400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.998398975344221e-05,
      "loss": 4.754,
      "step": 1500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.998078770413064e-05,
      "loss": 4.7141,
      "step": 1600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.997758565481909e-05,
      "loss": 4.6602,
      "step": 1700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.997438360550753e-05,
      "loss": 4.6326,
      "step": 1800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.997118155619597e-05,
      "loss": 4.5427,
      "step": 1900
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.996797950688441e-05,
      "loss": 4.5349,
      "step": 2000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.5511226654052734,
      "eval_runtime": 114.3823,
      "eval_samples_per_second": 87.426,
      "eval_steps_per_second": 5.464,
      "step": 2000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9964777457572846e-05,
      "loss": 4.4913,
      "step": 2100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.996157540826129e-05,
      "loss": 4.4802,
      "step": 2200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9958373358949726e-05,
      "loss": 4.4252,
      "step": 2300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.995517130963817e-05,
      "loss": 4.4014,
      "step": 2400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.995196926032661e-05,
      "loss": 4.3816,
      "step": 2500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994876721101505e-05,
      "loss": 4.3669,
      "step": 2600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994556516170349e-05,
      "loss": 4.3404,
      "step": 2700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.994236311239193e-05,
      "loss": 4.3198,
      "step": 2800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.993916106308038e-05,
      "loss": 4.2978,
      "step": 2900
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.993595901376881e-05,
      "loss": 4.2684,
      "step": 3000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.447725772857666,
      "eval_runtime": 113.9094,
      "eval_samples_per_second": 87.789,
      "eval_steps_per_second": 5.487,
      "step": 3000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.993275696445726e-05,
      "loss": 4.2808,
      "step": 3100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.992955491514569e-05,
      "loss": 4.2779,
      "step": 3200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.992635286583414e-05,
      "loss": 4.2116,
      "step": 3300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9923150816522576e-05,
      "loss": 4.1717,
      "step": 3400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9919948767211016e-05,
      "loss": 4.2164,
      "step": 3500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.991674671789946e-05,
      "loss": 4.1759,
      "step": 3600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9913544668587896e-05,
      "loss": 4.1909,
      "step": 3700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.991034261927634e-05,
      "loss": 4.1439,
      "step": 3800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9907140569964775e-05,
      "loss": 4.1416,
      "step": 3900
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.990393852065322e-05,
      "loss": 4.119,
      "step": 4000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.3744781017303467,
      "eval_runtime": 111.7317,
      "eval_samples_per_second": 89.5,
      "eval_steps_per_second": 5.594,
      "step": 4000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.990073647134166e-05,
      "loss": 4.1032,
      "step": 4100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.98975344220301e-05,
      "loss": 4.131,
      "step": 4200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.989433237271854e-05,
      "loss": 4.0996,
      "step": 4300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.989113032340698e-05,
      "loss": 4.0583,
      "step": 4400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.988792827409543e-05,
      "loss": 4.0848,
      "step": 4500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.988472622478386e-05,
      "loss": 4.0725,
      "step": 4600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9881524175472307e-05,
      "loss": 4.0761,
      "step": 4700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.987832212616074e-05,
      "loss": 4.056,
      "step": 4800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9875120076849186e-05,
      "loss": 4.0803,
      "step": 4900
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9871918027537626e-05,
      "loss": 4.0537,
      "step": 5000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.3372604846954346,
      "eval_runtime": 113.7249,
      "eval_samples_per_second": 87.932,
      "eval_steps_per_second": 5.496,
      "step": 5000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9868715978226066e-05,
      "loss": 4.0445,
      "step": 5100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.986551392891451e-05,
      "loss": 4.0175,
      "step": 5200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9862311879602945e-05,
      "loss": 4.0402,
      "step": 5300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.985910983029139e-05,
      "loss": 4.0238,
      "step": 5400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9855907780979825e-05,
      "loss": 4.0446,
      "step": 5500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.985270573166827e-05,
      "loss": 3.9834,
      "step": 5600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984950368235671e-05,
      "loss": 3.9877,
      "step": 5700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984630163304515e-05,
      "loss": 3.9684,
      "step": 5800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.984309958373359e-05,
      "loss": 3.9855,
      "step": 5900
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.983989753442203e-05,
      "loss": 3.9619,
      "step": 6000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.3133883476257324,
      "eval_runtime": 112.7819,
      "eval_samples_per_second": 88.667,
      "eval_steps_per_second": 5.542,
      "step": 6000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9836695485110477e-05,
      "loss": 3.9845,
      "step": 6100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.983349343579891e-05,
      "loss": 3.9672,
      "step": 6200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9830291386487356e-05,
      "loss": 3.9552,
      "step": 6300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9827089337175796e-05,
      "loss": 3.9424,
      "step": 6400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9823887287864235e-05,
      "loss": 3.9586,
      "step": 6500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9820685238552675e-05,
      "loss": 3.9541,
      "step": 6600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9817483189241115e-05,
      "loss": 3.9439,
      "step": 6700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.981428113992956e-05,
      "loss": 3.9432,
      "step": 6800
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9811079090617994e-05,
      "loss": 3.9222,
      "step": 6900
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.980787704130644e-05,
      "loss": 3.9466,
      "step": 7000
    },
    {
      "epoch": 0.0,
      "eval_loss": 3.27958083152771,
      "eval_runtime": 111.7284,
      "eval_samples_per_second": 89.503,
      "eval_steps_per_second": 5.594,
      "step": 7000
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.980467499199488e-05,
      "loss": 3.9253,
      "step": 7100
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.980147294268332e-05,
      "loss": 3.9129,
      "step": 7200
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.979827089337176e-05,
      "loss": 3.9032,
      "step": 7300
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.97950688440602e-05,
      "loss": 3.8977,
      "step": 7400
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.979186679474864e-05,
      "loss": 3.8869,
      "step": 7500
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.978866474543708e-05,
      "loss": 3.8907,
      "step": 7600
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.9785462696125526e-05,
      "loss": 3.9064,
      "step": 7700
    },
    {
      "epoch": 0.0,
      "learning_rate": 4.978226064681396e-05,
      "loss": 3.8862,
      "step": 7800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9779058597502405e-05,
      "loss": 3.8855,
      "step": 7900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9775856548190845e-05,
      "loss": 3.8971,
      "step": 8000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.255528450012207,
      "eval_runtime": 113.3994,
      "eval_samples_per_second": 88.184,
      "eval_steps_per_second": 5.511,
      "step": 8000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9772654498879285e-05,
      "loss": 3.885,
      "step": 8100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9769452449567725e-05,
      "loss": 3.8537,
      "step": 8200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9766250400256164e-05,
      "loss": 3.8527,
      "step": 8300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.976304835094461e-05,
      "loss": 3.8473,
      "step": 8400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9759846301633044e-05,
      "loss": 3.8552,
      "step": 8500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.975664425232149e-05,
      "loss": 3.8657,
      "step": 8600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.975344220300993e-05,
      "loss": 3.8652,
      "step": 8700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.975024015369837e-05,
      "loss": 3.8684,
      "step": 8800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.974703810438681e-05,
      "loss": 3.8627,
      "step": 8900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.974383605507525e-05,
      "loss": 3.8417,
      "step": 9000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.239633083343506,
      "eval_runtime": 114.4184,
      "eval_samples_per_second": 87.399,
      "eval_steps_per_second": 5.462,
      "step": 9000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.974063400576369e-05,
      "loss": 3.8382,
      "step": 9100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.973743195645213e-05,
      "loss": 3.8576,
      "step": 9200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9734229907140575e-05,
      "loss": 3.8578,
      "step": 9300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9731027857829015e-05,
      "loss": 3.8359,
      "step": 9400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9727825808517455e-05,
      "loss": 3.8688,
      "step": 9500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9724623759205894e-05,
      "loss": 3.8282,
      "step": 9600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9721421709894334e-05,
      "loss": 3.8127,
      "step": 9700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9718219660582774e-05,
      "loss": 3.8398,
      "step": 9800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9715017611271214e-05,
      "loss": 3.8117,
      "step": 9900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.971181556195966e-05,
      "loss": 3.8016,
      "step": 10000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.2313246726989746,
      "eval_runtime": 113.9204,
      "eval_samples_per_second": 87.781,
      "eval_steps_per_second": 5.486,
      "step": 10000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.970861351264809e-05,
      "loss": 3.8189,
      "step": 10100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.970541146333654e-05,
      "loss": 3.846,
      "step": 10200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.970220941402498e-05,
      "loss": 3.7957,
      "step": 10300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.969900736471342e-05,
      "loss": 3.8488,
      "step": 10400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.969580531540186e-05,
      "loss": 3.8132,
      "step": 10500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96926032660903e-05,
      "loss": 3.8255,
      "step": 10600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.968940121677874e-05,
      "loss": 3.782,
      "step": 10700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.968619916746718e-05,
      "loss": 3.8192,
      "step": 10800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9682997118155625e-05,
      "loss": 3.8334,
      "step": 10900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9679795068844064e-05,
      "loss": 3.8001,
      "step": 11000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.2163329124450684,
      "eval_runtime": 113.8931,
      "eval_samples_per_second": 87.802,
      "eval_steps_per_second": 5.488,
      "step": 11000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9676593019532504e-05,
      "loss": 3.7799,
      "step": 11100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9673390970220944e-05,
      "loss": 3.7866,
      "step": 11200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9670188920909384e-05,
      "loss": 3.8147,
      "step": 11300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.966698687159782e-05,
      "loss": 3.8189,
      "step": 11400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.966378482228626e-05,
      "loss": 3.7923,
      "step": 11500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.966058277297471e-05,
      "loss": 3.7795,
      "step": 11600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.965738072366315e-05,
      "loss": 3.7717,
      "step": 11700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.965417867435159e-05,
      "loss": 3.7907,
      "step": 11800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.965097662504003e-05,
      "loss": 3.7784,
      "step": 11900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.964777457572847e-05,
      "loss": 3.7567,
      "step": 12000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.2018871307373047,
      "eval_runtime": 111.9816,
      "eval_samples_per_second": 89.3,
      "eval_steps_per_second": 5.581,
      "step": 12000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.964457252641691e-05,
      "loss": 3.7788,
      "step": 12100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.964137047710535e-05,
      "loss": 3.7813,
      "step": 12200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.963816842779379e-05,
      "loss": 3.7519,
      "step": 12300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.963496637848223e-05,
      "loss": 3.7678,
      "step": 12400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9631764329170674e-05,
      "loss": 3.7844,
      "step": 12500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9628562279859114e-05,
      "loss": 3.7791,
      "step": 12600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9625360230547553e-05,
      "loss": 3.7774,
      "step": 12700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.962215818123599e-05,
      "loss": 3.7568,
      "step": 12800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.961895613192443e-05,
      "loss": 3.7768,
      "step": 12900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.961575408261287e-05,
      "loss": 3.7695,
      "step": 13000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.191197633743286,
      "eval_runtime": 112.7611,
      "eval_samples_per_second": 88.683,
      "eval_steps_per_second": 5.543,
      "step": 13000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.961255203330131e-05,
      "loss": 3.7667,
      "step": 13100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.960934998398976e-05,
      "loss": 3.7402,
      "step": 13200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.96061479346782e-05,
      "loss": 3.7566,
      "step": 13300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.960294588536664e-05,
      "loss": 3.7437,
      "step": 13400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.959974383605508e-05,
      "loss": 3.731,
      "step": 13500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.959654178674352e-05,
      "loss": 3.7386,
      "step": 13600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.959333973743196e-05,
      "loss": 3.7634,
      "step": 13700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.95901376881204e-05,
      "loss": 3.7171,
      "step": 13800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.958693563880884e-05,
      "loss": 3.7304,
      "step": 13900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9583733589497284e-05,
      "loss": 3.7579,
      "step": 14000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.179133176803589,
      "eval_runtime": 112.3978,
      "eval_samples_per_second": 88.97,
      "eval_steps_per_second": 5.561,
      "step": 14000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.958053154018572e-05,
      "loss": 3.7283,
      "step": 14100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.957732949087416e-05,
      "loss": 3.7567,
      "step": 14200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.95741274415626e-05,
      "loss": 3.7467,
      "step": 14300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.957092539225104e-05,
      "loss": 3.756,
      "step": 14400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.956772334293948e-05,
      "loss": 3.737,
      "step": 14500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.956452129362792e-05,
      "loss": 3.7222,
      "step": 14600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.956131924431636e-05,
      "loss": 3.7283,
      "step": 14700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.955811719500481e-05,
      "loss": 3.7412,
      "step": 14800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.955491514569325e-05,
      "loss": 3.7123,
      "step": 14900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.955171309638169e-05,
      "loss": 3.6986,
      "step": 15000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1683101654052734,
      "eval_runtime": 112.3462,
      "eval_samples_per_second": 89.011,
      "eval_steps_per_second": 5.563,
      "step": 15000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.954851104707013e-05,
      "loss": 3.712,
      "step": 15100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.954530899775857e-05,
      "loss": 3.7121,
      "step": 15200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.954210694844701e-05,
      "loss": 3.7271,
      "step": 15300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.953890489913545e-05,
      "loss": 3.713,
      "step": 15400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9535702849823886e-05,
      "loss": 3.7057,
      "step": 15500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.953250080051233e-05,
      "loss": 3.7023,
      "step": 15600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.952929875120077e-05,
      "loss": 3.7056,
      "step": 15700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.952609670188921e-05,
      "loss": 3.7157,
      "step": 15800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.952289465257765e-05,
      "loss": 3.6996,
      "step": 15900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.951969260326609e-05,
      "loss": 3.6937,
      "step": 16000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1550917625427246,
      "eval_runtime": 115.1105,
      "eval_samples_per_second": 86.873,
      "eval_steps_per_second": 5.43,
      "step": 16000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.951649055395453e-05,
      "loss": 3.6901,
      "step": 16100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.951328850464297e-05,
      "loss": 3.6892,
      "step": 16200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.951008645533142e-05,
      "loss": 3.6772,
      "step": 16300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.950688440601986e-05,
      "loss": 3.7105,
      "step": 16400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.95036823567083e-05,
      "loss": 3.6675,
      "step": 16500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.950048030739674e-05,
      "loss": 3.6909,
      "step": 16600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.949727825808518e-05,
      "loss": 3.6867,
      "step": 16700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.949407620877362e-05,
      "loss": 3.6675,
      "step": 16800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9490874159462056e-05,
      "loss": 3.6859,
      "step": 16900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.94876721101505e-05,
      "loss": 3.6677,
      "step": 17000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1490609645843506,
      "eval_runtime": 118.6227,
      "eval_samples_per_second": 84.301,
      "eval_steps_per_second": 5.269,
      "step": 17000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9484470060838936e-05,
      "loss": 3.6595,
      "step": 17100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.948126801152738e-05,
      "loss": 3.7092,
      "step": 17200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.947806596221582e-05,
      "loss": 3.6962,
      "step": 17300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.947486391290426e-05,
      "loss": 3.6964,
      "step": 17400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.94716618635927e-05,
      "loss": 3.6524,
      "step": 17500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.946845981428114e-05,
      "loss": 3.6803,
      "step": 17600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.946525776496958e-05,
      "loss": 3.6855,
      "step": 17700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.946205571565802e-05,
      "loss": 3.682,
      "step": 17800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.945885366634647e-05,
      "loss": 3.6825,
      "step": 17900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.945565161703491e-05,
      "loss": 3.6612,
      "step": 18000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1355764865875244,
      "eval_runtime": 120.4186,
      "eval_samples_per_second": 83.044,
      "eval_steps_per_second": 5.19,
      "step": 18000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.945244956772335e-05,
      "loss": 3.6786,
      "step": 18100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9449247518411787e-05,
      "loss": 3.6775,
      "step": 18200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9446045469100226e-05,
      "loss": 3.652,
      "step": 18300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9442843419788666e-05,
      "loss": 3.6936,
      "step": 18400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9439641370477106e-05,
      "loss": 3.6561,
      "step": 18500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.943643932116555e-05,
      "loss": 3.6729,
      "step": 18600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9433237271853985e-05,
      "loss": 3.6605,
      "step": 18700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.943003522254243e-05,
      "loss": 3.6607,
      "step": 18800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942683317323087e-05,
      "loss": 3.6471,
      "step": 18900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942363112391931e-05,
      "loss": 3.6503,
      "step": 19000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1274449825286865,
      "eval_runtime": 115.0891,
      "eval_samples_per_second": 86.889,
      "eval_steps_per_second": 5.431,
      "step": 19000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.942042907460775e-05,
      "loss": 3.6451,
      "step": 19100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.941722702529619e-05,
      "loss": 3.6418,
      "step": 19200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.941402497598464e-05,
      "loss": 3.6293,
      "step": 19300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.941082292667307e-05,
      "loss": 3.6414,
      "step": 19400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.940762087736152e-05,
      "loss": 3.6587,
      "step": 19500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9404418828049956e-05,
      "loss": 3.6407,
      "step": 19600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9401216778738396e-05,
      "loss": 3.639,
      "step": 19700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9398014729426836e-05,
      "loss": 3.6629,
      "step": 19800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9394812680115276e-05,
      "loss": 3.6377,
      "step": 19900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9391610630803715e-05,
      "loss": 3.639,
      "step": 20000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1242361068725586,
      "eval_runtime": 111.7854,
      "eval_samples_per_second": 89.457,
      "eval_steps_per_second": 5.591,
      "step": 20000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9388408581492155e-05,
      "loss": 3.6357,
      "step": 20100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.93852065321806e-05,
      "loss": 3.6367,
      "step": 20200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9382004482869035e-05,
      "loss": 3.6445,
      "step": 20300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.937880243355748e-05,
      "loss": 3.6291,
      "step": 20400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.937560038424592e-05,
      "loss": 3.6313,
      "step": 20500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.937239833493436e-05,
      "loss": 3.614,
      "step": 20600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.93691962856228e-05,
      "loss": 3.6371,
      "step": 20700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.936599423631124e-05,
      "loss": 3.6435,
      "step": 20800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9362792186999687e-05,
      "loss": 3.6484,
      "step": 20900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.935959013768812e-05,
      "loss": 3.6235,
      "step": 21000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.1158366203308105,
      "eval_runtime": 112.1177,
      "eval_samples_per_second": 89.192,
      "eval_steps_per_second": 5.574,
      "step": 21000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9356388088376566e-05,
      "loss": 3.6363,
      "step": 21100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9353186039065006e-05,
      "loss": 3.6116,
      "step": 21200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9349983989753446e-05,
      "loss": 3.6162,
      "step": 21300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9346781940441885e-05,
      "loss": 3.622,
      "step": 21400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9343579891130325e-05,
      "loss": 3.6319,
      "step": 21500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.934037784181877e-05,
      "loss": 3.6063,
      "step": 21600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9337175792507204e-05,
      "loss": 3.6229,
      "step": 21700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.933397374319565e-05,
      "loss": 3.6049,
      "step": 21800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9330771693884084e-05,
      "loss": 3.6155,
      "step": 21900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.932756964457253e-05,
      "loss": 3.611,
      "step": 22000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.102334499359131,
      "eval_runtime": 112.0966,
      "eval_samples_per_second": 89.209,
      "eval_steps_per_second": 5.576,
      "step": 22000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.932436759526097e-05,
      "loss": 3.5996,
      "step": 22100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.932116554594941e-05,
      "loss": 3.5927,
      "step": 22200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.931796349663785e-05,
      "loss": 3.5905,
      "step": 22300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.931476144732629e-05,
      "loss": 3.589,
      "step": 22400
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9311559398014736e-05,
      "loss": 3.5846,
      "step": 22500
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.930835734870317e-05,
      "loss": 3.604,
      "step": 22600
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9305155299391615e-05,
      "loss": 3.6128,
      "step": 22700
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9301953250080055e-05,
      "loss": 3.5843,
      "step": 22800
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9298751200768495e-05,
      "loss": 3.5861,
      "step": 22900
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9295549151456935e-05,
      "loss": 3.587,
      "step": 23000
    },
    {
      "epoch": 0.01,
      "eval_loss": 3.0996856689453125,
      "eval_runtime": 111.2639,
      "eval_samples_per_second": 89.876,
      "eval_steps_per_second": 5.617,
      "step": 23000
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9292347102145374e-05,
      "loss": 3.6078,
      "step": 23100
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.928914505283382e-05,
      "loss": 3.5987,
      "step": 23200
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.9285943003522254e-05,
      "loss": 3.5869,
      "step": 23300
    },
    {
      "epoch": 0.01,
      "learning_rate": 4.92827409542107e-05,
      "loss": 3.5936,
      "step": 23400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.927953890489913e-05,
      "loss": 3.6287,
      "step": 23500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.927633685558758e-05,
      "loss": 3.5879,
      "step": 23600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.927313480627602e-05,
      "loss": 3.5948,
      "step": 23700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.926993275696446e-05,
      "loss": 3.5987,
      "step": 23800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9266730707652906e-05,
      "loss": 3.5802,
      "step": 23900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.926352865834134e-05,
      "loss": 3.565,
      "step": 24000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.091219425201416,
      "eval_runtime": 111.2296,
      "eval_samples_per_second": 89.904,
      "eval_steps_per_second": 5.619,
      "step": 24000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9260326609029785e-05,
      "loss": 3.5402,
      "step": 24100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.925712455971822e-05,
      "loss": 3.5733,
      "step": 24200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9253922510406665e-05,
      "loss": 3.5883,
      "step": 24300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9250720461095105e-05,
      "loss": 3.5779,
      "step": 24400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9247518411783544e-05,
      "loss": 3.5813,
      "step": 24500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9244316362471984e-05,
      "loss": 3.5567,
      "step": 24600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9241114313160424e-05,
      "loss": 3.5451,
      "step": 24700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.923791226384887e-05,
      "loss": 3.5853,
      "step": 24800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.92347102145373e-05,
      "loss": 3.5767,
      "step": 24900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.923150816522575e-05,
      "loss": 3.5686,
      "step": 25000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.08496356010437,
      "eval_runtime": 111.9514,
      "eval_samples_per_second": 89.324,
      "eval_steps_per_second": 5.583,
      "step": 25000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.922830611591418e-05,
      "loss": 3.5741,
      "step": 25100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.922510406660263e-05,
      "loss": 3.571,
      "step": 25200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.922190201729107e-05,
      "loss": 3.5774,
      "step": 25300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.921869996797951e-05,
      "loss": 3.574,
      "step": 25400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9215497918667955e-05,
      "loss": 3.5717,
      "step": 25500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.921229586935639e-05,
      "loss": 3.5714,
      "step": 25600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9209093820044835e-05,
      "loss": 3.5826,
      "step": 25700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.920589177073327e-05,
      "loss": 3.5695,
      "step": 25800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9202689721421714e-05,
      "loss": 3.5816,
      "step": 25900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9199487672110154e-05,
      "loss": 3.5609,
      "step": 26000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.0786421298980713,
      "eval_runtime": 116.5119,
      "eval_samples_per_second": 85.828,
      "eval_steps_per_second": 5.364,
      "step": 26000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9196285622798594e-05,
      "loss": 3.5748,
      "step": 26100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.919308357348703e-05,
      "loss": 3.5512,
      "step": 26200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.918988152417547e-05,
      "loss": 3.5383,
      "step": 26300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.918667947486392e-05,
      "loss": 3.5495,
      "step": 26400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.918347742555235e-05,
      "loss": 3.571,
      "step": 26500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.91802753762408e-05,
      "loss": 3.5728,
      "step": 26600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.917707332692923e-05,
      "loss": 3.5376,
      "step": 26700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.917387127761768e-05,
      "loss": 3.5321,
      "step": 26800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.917066922830612e-05,
      "loss": 3.5682,
      "step": 26900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.916746717899456e-05,
      "loss": 3.5726,
      "step": 27000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.069263219833374,
      "eval_runtime": 120.9355,
      "eval_samples_per_second": 82.689,
      "eval_steps_per_second": 5.168,
      "step": 27000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9164265129683005e-05,
      "loss": 3.5637,
      "step": 27100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.916106308037144e-05,
      "loss": 3.5654,
      "step": 27200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9157861031059884e-05,
      "loss": 3.5792,
      "step": 27300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.915465898174832e-05,
      "loss": 3.5281,
      "step": 27400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9151456932436764e-05,
      "loss": 3.5381,
      "step": 27500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9148254883125197e-05,
      "loss": 3.5653,
      "step": 27600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.914505283381364e-05,
      "loss": 3.5442,
      "step": 27700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.914185078450208e-05,
      "loss": 3.5264,
      "step": 27800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.913864873519052e-05,
      "loss": 3.5338,
      "step": 27900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.913544668587897e-05,
      "loss": 3.5562,
      "step": 28000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.063171148300171,
      "eval_runtime": 115.177,
      "eval_samples_per_second": 86.823,
      "eval_steps_per_second": 5.426,
      "step": 28000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.91322446365674e-05,
      "loss": 3.5503,
      "step": 28100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.912904258725585e-05,
      "loss": 3.5376,
      "step": 28200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.912584053794428e-05,
      "loss": 3.5469,
      "step": 28300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.912263848863273e-05,
      "loss": 3.5303,
      "step": 28400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.911943643932117e-05,
      "loss": 3.5065,
      "step": 28500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.911623439000961e-05,
      "loss": 3.5448,
      "step": 28600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9113032340698054e-05,
      "loss": 3.5613,
      "step": 28700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.910983029138649e-05,
      "loss": 3.5306,
      "step": 28800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9106628242074933e-05,
      "loss": 3.5455,
      "step": 28900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9103426192763366e-05,
      "loss": 3.5517,
      "step": 29000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.060626268386841,
      "eval_runtime": 114.4729,
      "eval_samples_per_second": 87.357,
      "eval_steps_per_second": 5.46,
      "step": 29000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.910022414345181e-05,
      "loss": 3.5413,
      "step": 29100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.909702209414025e-05,
      "loss": 3.5445,
      "step": 29200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.909382004482869e-05,
      "loss": 3.5381,
      "step": 29300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.909061799551713e-05,
      "loss": 3.5401,
      "step": 29400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.908741594620557e-05,
      "loss": 3.5359,
      "step": 29500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.908421389689402e-05,
      "loss": 3.4977,
      "step": 29600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.908101184758245e-05,
      "loss": 3.5274,
      "step": 29700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.90778097982709e-05,
      "loss": 3.5224,
      "step": 29800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.907460774895933e-05,
      "loss": 3.524,
      "step": 29900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.907140569964778e-05,
      "loss": 3.5061,
      "step": 30000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.054588794708252,
      "eval_runtime": 113.9658,
      "eval_samples_per_second": 87.746,
      "eval_steps_per_second": 5.484,
      "step": 30000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.906820365033622e-05,
      "loss": 3.532,
      "step": 30100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.906500160102466e-05,
      "loss": 3.535,
      "step": 30200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.90617995517131e-05,
      "loss": 3.4915,
      "step": 30300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9058597502401536e-05,
      "loss": 3.4985,
      "step": 30400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.905539545308998e-05,
      "loss": 3.5248,
      "step": 30500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9052193403778416e-05,
      "loss": 3.5171,
      "step": 30600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.904899135446686e-05,
      "loss": 3.5375,
      "step": 30700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.90457893051553e-05,
      "loss": 3.5113,
      "step": 30800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.904258725584374e-05,
      "loss": 3.5162,
      "step": 30900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.903938520653218e-05,
      "loss": 3.4836,
      "step": 31000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.0460429191589355,
      "eval_runtime": 112.5861,
      "eval_samples_per_second": 88.821,
      "eval_steps_per_second": 5.551,
      "step": 31000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.903618315722062e-05,
      "loss": 3.5097,
      "step": 31100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.903298110790907e-05,
      "loss": 3.5286,
      "step": 31200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.90297790585975e-05,
      "loss": 3.5063,
      "step": 31300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.902657700928595e-05,
      "loss": 3.5302,
      "step": 31400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.902337495997439e-05,
      "loss": 3.5458,
      "step": 31500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.902017291066283e-05,
      "loss": 3.526,
      "step": 31600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9016970861351266e-05,
      "loss": 3.5291,
      "step": 31700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9013768812039706e-05,
      "loss": 3.4827,
      "step": 31800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.901056676272815e-05,
      "loss": 3.5302,
      "step": 31900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9007364713416586e-05,
      "loss": 3.5233,
      "step": 32000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.043567657470703,
      "eval_runtime": 113.1378,
      "eval_samples_per_second": 88.388,
      "eval_steps_per_second": 5.524,
      "step": 32000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.900416266410503e-05,
      "loss": 3.5328,
      "step": 32100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.9000960614793465e-05,
      "loss": 3.5158,
      "step": 32200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.899775856548191e-05,
      "loss": 3.5021,
      "step": 32300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.899455651617035e-05,
      "loss": 3.4773,
      "step": 32400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.899135446685879e-05,
      "loss": 3.4845,
      "step": 32500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.898815241754723e-05,
      "loss": 3.513,
      "step": 32600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.898495036823567e-05,
      "loss": 3.5246,
      "step": 32700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.898174831892412e-05,
      "loss": 3.4963,
      "step": 32800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.897854626961255e-05,
      "loss": 3.4865,
      "step": 32900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8975344220300997e-05,
      "loss": 3.5051,
      "step": 33000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.036905527114868,
      "eval_runtime": 111.6621,
      "eval_samples_per_second": 89.556,
      "eval_steps_per_second": 5.597,
      "step": 33000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8972142170989436e-05,
      "loss": 3.4992,
      "step": 33100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8968940121677876e-05,
      "loss": 3.5289,
      "step": 33200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8965738072366316e-05,
      "loss": 3.4976,
      "step": 33300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8962536023054756e-05,
      "loss": 3.5025,
      "step": 33400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.89593339737432e-05,
      "loss": 3.498,
      "step": 33500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8956131924431635e-05,
      "loss": 3.4899,
      "step": 33600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.895292987512008e-05,
      "loss": 3.5115,
      "step": 33700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.894972782580852e-05,
      "loss": 3.4835,
      "step": 33800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.894652577649696e-05,
      "loss": 3.5037,
      "step": 33900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.89433237271854e-05,
      "loss": 3.506,
      "step": 34000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.0296170711517334,
      "eval_runtime": 113.4976,
      "eval_samples_per_second": 88.108,
      "eval_steps_per_second": 5.507,
      "step": 34000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.894012167787384e-05,
      "loss": 3.4887,
      "step": 34100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.893691962856228e-05,
      "loss": 3.4919,
      "step": 34200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.893371757925072e-05,
      "loss": 3.4917,
      "step": 34300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8930515529939166e-05,
      "loss": 3.4559,
      "step": 34400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8927313480627606e-05,
      "loss": 3.4908,
      "step": 34500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8924111431316046e-05,
      "loss": 3.4864,
      "step": 34600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8920909382004486e-05,
      "loss": 3.4708,
      "step": 34700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8917707332692925e-05,
      "loss": 3.4903,
      "step": 34800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8914505283381365e-05,
      "loss": 3.4923,
      "step": 34900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8911303234069805e-05,
      "loss": 3.4901,
      "step": 35000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.0275020599365234,
      "eval_runtime": 115.9818,
      "eval_samples_per_second": 86.22,
      "eval_steps_per_second": 5.389,
      "step": 35000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.890810118475825e-05,
      "loss": 3.4833,
      "step": 35100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8904899135446684e-05,
      "loss": 3.4782,
      "step": 35200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.890169708613513e-05,
      "loss": 3.4735,
      "step": 35300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.889849503682357e-05,
      "loss": 3.4744,
      "step": 35400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.889529298751201e-05,
      "loss": 3.4551,
      "step": 35500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.889209093820045e-05,
      "loss": 3.4871,
      "step": 35600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.888888888888889e-05,
      "loss": 3.4708,
      "step": 35700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.888568683957733e-05,
      "loss": 3.4622,
      "step": 35800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.888248479026577e-05,
      "loss": 3.4917,
      "step": 35900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8879282740954216e-05,
      "loss": 3.4667,
      "step": 36000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.025461435317993,
      "eval_runtime": 118.0931,
      "eval_samples_per_second": 84.679,
      "eval_steps_per_second": 5.292,
      "step": 36000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8876080691642656e-05,
      "loss": 3.4701,
      "step": 36100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8872878642331095e-05,
      "loss": 3.4865,
      "step": 36200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8869676593019535e-05,
      "loss": 3.51,
      "step": 36300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8866474543707975e-05,
      "loss": 3.4696,
      "step": 36400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8863272494396415e-05,
      "loss": 3.4955,
      "step": 36500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8860070445084854e-05,
      "loss": 3.4673,
      "step": 36600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.88568683957733e-05,
      "loss": 3.4934,
      "step": 36700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.885366634646174e-05,
      "loss": 3.5083,
      "step": 36800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.885046429715018e-05,
      "loss": 3.4792,
      "step": 36900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.884726224783862e-05,
      "loss": 3.4905,
      "step": 37000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.0169453620910645,
      "eval_runtime": 115.9185,
      "eval_samples_per_second": 86.268,
      "eval_steps_per_second": 5.392,
      "step": 37000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.884406019852706e-05,
      "loss": 3.4888,
      "step": 37100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.88408581492155e-05,
      "loss": 3.4544,
      "step": 37200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.883765609990394e-05,
      "loss": 3.4678,
      "step": 37300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.883445405059238e-05,
      "loss": 3.4713,
      "step": 37400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.883125200128082e-05,
      "loss": 3.4699,
      "step": 37500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8828049951969265e-05,
      "loss": 3.5167,
      "step": 37600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8824847902657705e-05,
      "loss": 3.4814,
      "step": 37700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8821645853346145e-05,
      "loss": 3.4933,
      "step": 37800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8818443804034584e-05,
      "loss": 3.4767,
      "step": 37900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8815241754723024e-05,
      "loss": 3.4947,
      "step": 38000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.0117151737213135,
      "eval_runtime": 113.5335,
      "eval_samples_per_second": 88.08,
      "eval_steps_per_second": 5.505,
      "step": 38000
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8812039705411464e-05,
      "loss": 3.4812,
      "step": 38100
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.8808837656099904e-05,
      "loss": 3.4553,
      "step": 38200
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.880563560678835e-05,
      "loss": 3.4511,
      "step": 38300
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.880243355747679e-05,
      "loss": 3.4881,
      "step": 38400
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.879923150816523e-05,
      "loss": 3.4604,
      "step": 38500
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.879602945885367e-05,
      "loss": 3.4633,
      "step": 38600
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.879282740954211e-05,
      "loss": 3.4738,
      "step": 38700
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.878962536023055e-05,
      "loss": 3.5018,
      "step": 38800
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.878642331091899e-05,
      "loss": 3.4876,
      "step": 38900
    },
    {
      "epoch": 0.02,
      "learning_rate": 4.878322126160743e-05,
      "loss": 3.4748,
      "step": 39000
    },
    {
      "epoch": 0.02,
      "eval_loss": 3.010026693344116,
      "eval_runtime": 111.7767,
      "eval_samples_per_second": 89.464,
      "eval_steps_per_second": 5.592,
      "step": 39000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8780019212295875e-05,
      "loss": 3.4694,
      "step": 39100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8776817162984315e-05,
      "loss": 3.4396,
      "step": 39200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8773615113672754e-05,
      "loss": 3.4522,
      "step": 39300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8770413064361194e-05,
      "loss": 3.4425,
      "step": 39400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8767211015049634e-05,
      "loss": 3.4642,
      "step": 39500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8764008965738074e-05,
      "loss": 3.4631,
      "step": 39600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.876080691642651e-05,
      "loss": 3.45,
      "step": 39700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.875760486711495e-05,
      "loss": 3.4329,
      "step": 39800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.87544028178034e-05,
      "loss": 3.4799,
      "step": 39900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.875120076849184e-05,
      "loss": 3.4373,
      "step": 40000
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.011000394821167,
      "eval_runtime": 111.8221,
      "eval_samples_per_second": 89.428,
      "eval_steps_per_second": 5.589,
      "step": 40000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.874799871918028e-05,
      "loss": 3.4456,
      "step": 40100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.874479666986872e-05,
      "loss": 3.4187,
      "step": 40200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.874159462055716e-05,
      "loss": 3.462,
      "step": 40300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.87383925712456e-05,
      "loss": 3.4461,
      "step": 40400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.873519052193404e-05,
      "loss": 3.4485,
      "step": 40500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.873198847262248e-05,
      "loss": 3.4356,
      "step": 40600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8728786423310924e-05,
      "loss": 3.4632,
      "step": 40700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8725584373999364e-05,
      "loss": 3.4406,
      "step": 40800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8722382324687804e-05,
      "loss": 3.4215,
      "step": 40900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8719180275376243e-05,
      "loss": 3.4534,
      "step": 41000
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.000332832336426,
      "eval_runtime": 111.1575,
      "eval_samples_per_second": 89.962,
      "eval_steps_per_second": 5.623,
      "step": 41000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.871597822606468e-05,
      "loss": 3.43,
      "step": 41100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.871277617675312e-05,
      "loss": 3.4565,
      "step": 41200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.870957412744156e-05,
      "loss": 3.4512,
      "step": 41300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.870637207813001e-05,
      "loss": 3.4478,
      "step": 41400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.870317002881845e-05,
      "loss": 3.4462,
      "step": 41500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.869996797950689e-05,
      "loss": 3.4254,
      "step": 41600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.869676593019533e-05,
      "loss": 3.4398,
      "step": 41700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.869356388088377e-05,
      "loss": 3.4603,
      "step": 41800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.869036183157221e-05,
      "loss": 3.4382,
      "step": 41900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.868715978226065e-05,
      "loss": 3.4286,
      "step": 42000
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.0020840167999268,
      "eval_runtime": 112.0954,
      "eval_samples_per_second": 89.21,
      "eval_steps_per_second": 5.576,
      "step": 42000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.868395773294909e-05,
      "loss": 3.4298,
      "step": 42100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.868075568363753e-05,
      "loss": 3.44,
      "step": 42200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8677553634325974e-05,
      "loss": 3.4483,
      "step": 42300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.867435158501441e-05,
      "loss": 3.3978,
      "step": 42400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.867114953570285e-05,
      "loss": 3.4353,
      "step": 42500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.866794748639129e-05,
      "loss": 3.4703,
      "step": 42600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.866474543707973e-05,
      "loss": 3.487,
      "step": 42700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.866154338776817e-05,
      "loss": 3.4693,
      "step": 42800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.865834133845661e-05,
      "loss": 3.4594,
      "step": 42900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.865513928914506e-05,
      "loss": 3.4389,
      "step": 43000
    },
    {
      "epoch": 0.03,
      "eval_loss": 3.0003559589385986,
      "eval_runtime": 111.5333,
      "eval_samples_per_second": 89.659,
      "eval_steps_per_second": 5.604,
      "step": 43000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.86519372398335e-05,
      "loss": 3.4353,
      "step": 43100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.864873519052194e-05,
      "loss": 3.4354,
      "step": 43200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.864553314121038e-05,
      "loss": 3.4353,
      "step": 43300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.864233109189882e-05,
      "loss": 3.4572,
      "step": 43400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.863912904258726e-05,
      "loss": 3.4454,
      "step": 43500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.86359269932757e-05,
      "loss": 3.4394,
      "step": 43600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8632724943964144e-05,
      "loss": 3.4489,
      "step": 43700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8629522894652576e-05,
      "loss": 3.4311,
      "step": 43800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.862632084534102e-05,
      "loss": 3.401,
      "step": 43900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.862311879602946e-05,
      "loss": 3.4221,
      "step": 44000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9958832263946533,
      "eval_runtime": 114.7893,
      "eval_samples_per_second": 87.116,
      "eval_steps_per_second": 5.445,
      "step": 44000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.86199167467179e-05,
      "loss": 3.4372,
      "step": 44100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.861671469740634e-05,
      "loss": 3.4243,
      "step": 44200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.861351264809478e-05,
      "loss": 3.4234,
      "step": 44300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.861031059878323e-05,
      "loss": 3.4437,
      "step": 44400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.860710854947166e-05,
      "loss": 3.4486,
      "step": 44500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.860390650016011e-05,
      "loss": 3.4615,
      "step": 44600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.860070445084855e-05,
      "loss": 3.4427,
      "step": 44700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.859750240153699e-05,
      "loss": 3.4411,
      "step": 44800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.859430035222543e-05,
      "loss": 3.4425,
      "step": 44900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.859109830291387e-05,
      "loss": 3.4027,
      "step": 45000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9935693740844727,
      "eval_runtime": 118.6277,
      "eval_samples_per_second": 84.297,
      "eval_steps_per_second": 5.269,
      "step": 45000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.858789625360231e-05,
      "loss": 3.4277,
      "step": 45100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8584694204290746e-05,
      "loss": 3.441,
      "step": 45200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.858149215497919e-05,
      "loss": 3.4601,
      "step": 45300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8578290105667626e-05,
      "loss": 3.4241,
      "step": 45400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.857508805635607e-05,
      "loss": 3.4337,
      "step": 45500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.857188600704451e-05,
      "loss": 3.4502,
      "step": 45600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.856868395773295e-05,
      "loss": 3.4215,
      "step": 45700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.856548190842139e-05,
      "loss": 3.4082,
      "step": 45800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.856227985910983e-05,
      "loss": 3.4456,
      "step": 45900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.855907780979828e-05,
      "loss": 3.418,
      "step": 46000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.989250898361206,
      "eval_runtime": 119.2728,
      "eval_samples_per_second": 83.841,
      "eval_steps_per_second": 5.24,
      "step": 46000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.855587576048671e-05,
      "loss": 3.4328,
      "step": 46100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.855267371117516e-05,
      "loss": 3.4311,
      "step": 46200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.854947166186359e-05,
      "loss": 3.4273,
      "step": 46300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.854626961255204e-05,
      "loss": 3.3881,
      "step": 46400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8543067563240477e-05,
      "loss": 3.4439,
      "step": 46500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8539865513928916e-05,
      "loss": 3.4236,
      "step": 46600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.853666346461736e-05,
      "loss": 3.4426,
      "step": 46700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8533461415305796e-05,
      "loss": 3.4173,
      "step": 46800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.853025936599424e-05,
      "loss": 3.395,
      "step": 46900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8527057316682675e-05,
      "loss": 3.4102,
      "step": 47000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9878439903259277,
      "eval_runtime": 113.3919,
      "eval_samples_per_second": 88.19,
      "eval_steps_per_second": 5.512,
      "step": 47000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.852385526737112e-05,
      "loss": 3.4532,
      "step": 47100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.852065321805956e-05,
      "loss": 3.4303,
      "step": 47200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8517451168748e-05,
      "loss": 3.4173,
      "step": 47300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.851424911943644e-05,
      "loss": 3.4864,
      "step": 47400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.851104707012488e-05,
      "loss": 3.435,
      "step": 47500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.850784502081333e-05,
      "loss": 3.4145,
      "step": 47600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.850464297150176e-05,
      "loss": 3.4361,
      "step": 47700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.850144092219021e-05,
      "loss": 3.4387,
      "step": 47800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.849823887287864e-05,
      "loss": 3.4165,
      "step": 47900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8495036823567086e-05,
      "loss": 3.4367,
      "step": 48000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9852871894836426,
      "eval_runtime": 112.2045,
      "eval_samples_per_second": 89.123,
      "eval_steps_per_second": 5.57,
      "step": 48000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8491834774255526e-05,
      "loss": 3.3839,
      "step": 48100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8488632724943966e-05,
      "loss": 3.3952,
      "step": 48200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.848543067563241e-05,
      "loss": 3.4375,
      "step": 48300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8482228626320845e-05,
      "loss": 3.4262,
      "step": 48400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.847902657700929e-05,
      "loss": 3.3929,
      "step": 48500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8475824527697725e-05,
      "loss": 3.3818,
      "step": 48600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.847262247838617e-05,
      "loss": 3.4358,
      "step": 48700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.846942042907461e-05,
      "loss": 3.4162,
      "step": 48800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.846621837976305e-05,
      "loss": 3.4493,
      "step": 48900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.84630163304515e-05,
      "loss": 3.4203,
      "step": 49000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9820396900177,
      "eval_runtime": 112.0521,
      "eval_samples_per_second": 89.244,
      "eval_steps_per_second": 5.578,
      "step": 49000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.845981428113993e-05,
      "loss": 3.4204,
      "step": 49100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8456612231828377e-05,
      "loss": 3.4081,
      "step": 49200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.845341018251681e-05,
      "loss": 3.4149,
      "step": 49300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8450208133205256e-05,
      "loss": 3.4217,
      "step": 49400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.844700608389369e-05,
      "loss": 3.4163,
      "step": 49500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8443804034582136e-05,
      "loss": 3.4067,
      "step": 49600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8440601985270575e-05,
      "loss": 3.3937,
      "step": 49700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8437399935959015e-05,
      "loss": 3.4203,
      "step": 49800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.843419788664746e-05,
      "loss": 3.3866,
      "step": 49900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8430995837335894e-05,
      "loss": 3.4057,
      "step": 50000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9746055603027344,
      "eval_runtime": 112.2859,
      "eval_samples_per_second": 89.058,
      "eval_steps_per_second": 5.566,
      "step": 50000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.842779378802434e-05,
      "loss": 3.4036,
      "step": 50100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8424591738712774e-05,
      "loss": 3.3798,
      "step": 50200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.842138968940122e-05,
      "loss": 3.3284,
      "step": 50300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.841818764008966e-05,
      "loss": 3.3603,
      "step": 50400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.84149855907781e-05,
      "loss": 3.3887,
      "step": 50500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8411783541466546e-05,
      "loss": 3.4045,
      "step": 50600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.840858149215498e-05,
      "loss": 3.4072,
      "step": 50700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8405379442843426e-05,
      "loss": 3.413,
      "step": 50800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.840217739353186e-05,
      "loss": 3.4301,
      "step": 50900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8398975344220305e-05,
      "loss": 3.4005,
      "step": 51000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9741649627685547,
      "eval_runtime": 112.1308,
      "eval_samples_per_second": 89.182,
      "eval_steps_per_second": 5.574,
      "step": 51000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.839577329490874e-05,
      "loss": 3.3987,
      "step": 51100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8392571245597185e-05,
      "loss": 3.4013,
      "step": 51200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8389369196285625e-05,
      "loss": 3.4138,
      "step": 51300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8386167146974064e-05,
      "loss": 3.4134,
      "step": 51400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.838296509766251e-05,
      "loss": 3.3756,
      "step": 51500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8379763048350944e-05,
      "loss": 3.4194,
      "step": 51600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.837656099903939e-05,
      "loss": 3.3715,
      "step": 51700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.837335894972782e-05,
      "loss": 3.4043,
      "step": 51800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.837015690041627e-05,
      "loss": 3.3614,
      "step": 51900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.836695485110471e-05,
      "loss": 3.3793,
      "step": 52000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9723241329193115,
      "eval_runtime": 114.896,
      "eval_samples_per_second": 87.035,
      "eval_steps_per_second": 5.44,
      "step": 52000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.836375280179315e-05,
      "loss": 3.3658,
      "step": 52100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8360550752481596e-05,
      "loss": 3.3759,
      "step": 52200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.835734870317003e-05,
      "loss": 3.3789,
      "step": 52300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8354146653858475e-05,
      "loss": 3.3951,
      "step": 52400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.835094460454691e-05,
      "loss": 3.3904,
      "step": 52500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8347742555235355e-05,
      "loss": 3.4012,
      "step": 52600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.834454050592379e-05,
      "loss": 3.381,
      "step": 52700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8341338456612234e-05,
      "loss": 3.3695,
      "step": 52800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8338136407300674e-05,
      "loss": 3.3615,
      "step": 52900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8334934357989114e-05,
      "loss": 3.3862,
      "step": 53000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.9677822589874268,
      "eval_runtime": 115.5039,
      "eval_samples_per_second": 86.577,
      "eval_steps_per_second": 5.411,
      "step": 53000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.833173230867756e-05,
      "loss": 3.3834,
      "step": 53100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.832853025936599e-05,
      "loss": 3.3695,
      "step": 53200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.832532821005444e-05,
      "loss": 3.3751,
      "step": 53300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.832212616074287e-05,
      "loss": 3.3845,
      "step": 53400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.831892411143132e-05,
      "loss": 3.3658,
      "step": 53500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.831572206211976e-05,
      "loss": 3.3698,
      "step": 53600
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.83125200128082e-05,
      "loss": 3.3554,
      "step": 53700
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8309317963496645e-05,
      "loss": 3.3771,
      "step": 53800
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.830611591418508e-05,
      "loss": 3.3861,
      "step": 53900
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8302913864873525e-05,
      "loss": 3.4036,
      "step": 54000
    },
    {
      "epoch": 0.03,
      "eval_loss": 2.965113878250122,
      "eval_runtime": 117.2333,
      "eval_samples_per_second": 85.3,
      "eval_steps_per_second": 5.331,
      "step": 54000
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.829971181556196e-05,
      "loss": 3.3828,
      "step": 54100
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8296509766250404e-05,
      "loss": 3.3893,
      "step": 54200
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8293307716938844e-05,
      "loss": 3.3833,
      "step": 54300
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.8290105667627284e-05,
      "loss": 3.3958,
      "step": 54400
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.828690361831572e-05,
      "loss": 3.4141,
      "step": 54500
    },
    {
      "epoch": 0.03,
      "learning_rate": 4.828370156900416e-05,
      "loss": 3.3921,
      "step": 54600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.828049951969261e-05,
      "loss": 3.4038,
      "step": 54700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.827729747038104e-05,
      "loss": 3.3899,
      "step": 54800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.827409542106949e-05,
      "loss": 3.4028,
      "step": 54900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.827089337175792e-05,
      "loss": 3.406,
      "step": 55000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9611377716064453,
      "eval_runtime": 114.376,
      "eval_samples_per_second": 87.431,
      "eval_steps_per_second": 5.464,
      "step": 55000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.826769132244637e-05,
      "loss": 3.3992,
      "step": 55100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.826448927313481e-05,
      "loss": 3.3877,
      "step": 55200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.826128722382325e-05,
      "loss": 3.3635,
      "step": 55300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8258085174511695e-05,
      "loss": 3.3928,
      "step": 55400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.825488312520013e-05,
      "loss": 3.3671,
      "step": 55500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8251681075888574e-05,
      "loss": 3.3428,
      "step": 55600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.824847902657701e-05,
      "loss": 3.3739,
      "step": 55700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8245276977265454e-05,
      "loss": 3.3789,
      "step": 55800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.824207492795389e-05,
      "loss": 3.362,
      "step": 55900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.823887287864233e-05,
      "loss": 3.4003,
      "step": 56000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.962881326675415,
      "eval_runtime": 114.001,
      "eval_samples_per_second": 87.719,
      "eval_steps_per_second": 5.482,
      "step": 56000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.823567082933077e-05,
      "loss": 3.3653,
      "step": 56100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.823246878001921e-05,
      "loss": 3.374,
      "step": 56200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.822926673070766e-05,
      "loss": 3.3788,
      "step": 56300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.822606468139609e-05,
      "loss": 3.3774,
      "step": 56400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.822286263208454e-05,
      "loss": 3.3892,
      "step": 56500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.821966058277298e-05,
      "loss": 3.3733,
      "step": 56600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.821645853346142e-05,
      "loss": 3.3841,
      "step": 56700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.821325648414986e-05,
      "loss": 3.3656,
      "step": 56800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.82100544348383e-05,
      "loss": 3.3795,
      "step": 56900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8206852385526744e-05,
      "loss": 3.4189,
      "step": 57000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.955794095993042,
      "eval_runtime": 113.5835,
      "eval_samples_per_second": 88.041,
      "eval_steps_per_second": 5.503,
      "step": 57000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.820365033621518e-05,
      "loss": 3.3953,
      "step": 57100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8200448286903623e-05,
      "loss": 3.3916,
      "step": 57200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8197246237592056e-05,
      "loss": 3.3656,
      "step": 57300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.81940441882805e-05,
      "loss": 3.4055,
      "step": 57400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.819084213896894e-05,
      "loss": 3.3966,
      "step": 57500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.818764008965738e-05,
      "loss": 3.3707,
      "step": 57600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.818443804034582e-05,
      "loss": 3.3828,
      "step": 57700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.818123599103426e-05,
      "loss": 3.3897,
      "step": 57800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.817803394172271e-05,
      "loss": 3.4201,
      "step": 57900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.817483189241114e-05,
      "loss": 3.3517,
      "step": 58000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9614787101745605,
      "eval_runtime": 109.1716,
      "eval_samples_per_second": 91.599,
      "eval_steps_per_second": 5.725,
      "step": 58000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.817162984309959e-05,
      "loss": 3.3595,
      "step": 58100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.816842779378803e-05,
      "loss": 3.3646,
      "step": 58200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.816522574447647e-05,
      "loss": 3.3577,
      "step": 58300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.816202369516491e-05,
      "loss": 3.3774,
      "step": 58400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.815882164585335e-05,
      "loss": 3.3648,
      "step": 58500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.815561959654179e-05,
      "loss": 3.3662,
      "step": 58600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8152417547230226e-05,
      "loss": 3.3959,
      "step": 58700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.814921549791867e-05,
      "loss": 3.355,
      "step": 58800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.814601344860711e-05,
      "loss": 3.3781,
      "step": 58900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.814281139929555e-05,
      "loss": 3.3672,
      "step": 59000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9559738636016846,
      "eval_runtime": 114.6994,
      "eval_samples_per_second": 87.184,
      "eval_steps_per_second": 5.449,
      "step": 59000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.813960934998399e-05,
      "loss": 3.3962,
      "step": 59100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.813640730067243e-05,
      "loss": 3.3935,
      "step": 59200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.813320525136087e-05,
      "loss": 3.3865,
      "step": 59300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.813000320204931e-05,
      "loss": 3.3756,
      "step": 59400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.812680115273776e-05,
      "loss": 3.3649,
      "step": 59500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.812359910342619e-05,
      "loss": 3.3355,
      "step": 59600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.812039705411464e-05,
      "loss": 3.366,
      "step": 59700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.811719500480308e-05,
      "loss": 3.3634,
      "step": 59800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.811399295549152e-05,
      "loss": 3.3573,
      "step": 59900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8110790906179956e-05,
      "loss": 3.3585,
      "step": 60000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.950742483139038,
      "eval_runtime": 113.5787,
      "eval_samples_per_second": 88.045,
      "eval_steps_per_second": 5.503,
      "step": 60000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8107588856868396e-05,
      "loss": 3.3618,
      "step": 60100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.810438680755684e-05,
      "loss": 3.3708,
      "step": 60200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8101184758245276e-05,
      "loss": 3.376,
      "step": 60300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.809798270893372e-05,
      "loss": 3.3543,
      "step": 60400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.809478065962216e-05,
      "loss": 3.3556,
      "step": 60500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.80915786103106e-05,
      "loss": 3.3937,
      "step": 60600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.808837656099904e-05,
      "loss": 3.3688,
      "step": 60700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.808517451168748e-05,
      "loss": 3.3795,
      "step": 60800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.808197246237592e-05,
      "loss": 3.3672,
      "step": 60900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.807877041306436e-05,
      "loss": 3.3531,
      "step": 61000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9521257877349854,
      "eval_runtime": 113.403,
      "eval_samples_per_second": 88.181,
      "eval_steps_per_second": 5.511,
      "step": 61000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.807556836375281e-05,
      "loss": 3.3473,
      "step": 61100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.807236631444125e-05,
      "loss": 3.3647,
      "step": 61200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8069164265129687e-05,
      "loss": 3.3604,
      "step": 61300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8065962215818126e-05,
      "loss": 3.3747,
      "step": 61400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8062760166506566e-05,
      "loss": 3.3559,
      "step": 61500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8059558117195006e-05,
      "loss": 3.3385,
      "step": 61600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8056356067883446e-05,
      "loss": 3.3759,
      "step": 61700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.805315401857189e-05,
      "loss": 3.3591,
      "step": 61800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8049951969260325e-05,
      "loss": 3.3572,
      "step": 61900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.804674991994877e-05,
      "loss": 3.3495,
      "step": 62000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9511172771453857,
      "eval_runtime": 113.1132,
      "eval_samples_per_second": 88.407,
      "eval_steps_per_second": 5.525,
      "step": 62000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.804354787063721e-05,
      "loss": 3.3624,
      "step": 62100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.804034582132565e-05,
      "loss": 3.3707,
      "step": 62200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.803714377201409e-05,
      "loss": 3.3549,
      "step": 62300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.803394172270253e-05,
      "loss": 3.3535,
      "step": 62400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.803073967339097e-05,
      "loss": 3.3391,
      "step": 62500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.802753762407941e-05,
      "loss": 3.3495,
      "step": 62600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8024335574767856e-05,
      "loss": 3.3363,
      "step": 62700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8021133525456296e-05,
      "loss": 3.3709,
      "step": 62800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8017931476144736e-05,
      "loss": 3.3151,
      "step": 62900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8014729426833176e-05,
      "loss": 3.3441,
      "step": 63000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.943289041519165,
      "eval_runtime": 114.432,
      "eval_samples_per_second": 87.388,
      "eval_steps_per_second": 5.462,
      "step": 63000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8011527377521615e-05,
      "loss": 3.3363,
      "step": 63100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8008325328210055e-05,
      "loss": 3.3411,
      "step": 63200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8005123278898495e-05,
      "loss": 3.3705,
      "step": 63300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.8001921229586935e-05,
      "loss": 3.3411,
      "step": 63400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.799871918027538e-05,
      "loss": 3.3427,
      "step": 63500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.799551713096382e-05,
      "loss": 3.3555,
      "step": 63600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.799231508165226e-05,
      "loss": 3.3377,
      "step": 63700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.79891130323407e-05,
      "loss": 3.3442,
      "step": 63800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.798591098302914e-05,
      "loss": 3.3362,
      "step": 63900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.798270893371758e-05,
      "loss": 3.3829,
      "step": 64000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9423086643218994,
      "eval_runtime": 114.0763,
      "eval_samples_per_second": 87.661,
      "eval_steps_per_second": 5.479,
      "step": 64000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.797950688440602e-05,
      "loss": 3.3487,
      "step": 64100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7976304835094466e-05,
      "loss": 3.3613,
      "step": 64200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7973102785782906e-05,
      "loss": 3.3748,
      "step": 64300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7969900736471346e-05,
      "loss": 3.3781,
      "step": 64400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7966698687159785e-05,
      "loss": 3.3511,
      "step": 64500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7963496637848225e-05,
      "loss": 3.392,
      "step": 64600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7960294588536665e-05,
      "loss": 3.3506,
      "step": 64700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7957092539225105e-05,
      "loss": 3.3334,
      "step": 64800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7953890489913544e-05,
      "loss": 3.3527,
      "step": 64900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7950688440601984e-05,
      "loss": 3.3687,
      "step": 65000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9385969638824463,
      "eval_runtime": 113.7277,
      "eval_samples_per_second": 87.929,
      "eval_steps_per_second": 5.496,
      "step": 65000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.794748639129043e-05,
      "loss": 3.3728,
      "step": 65100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.794428434197887e-05,
      "loss": 3.3475,
      "step": 65200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.794108229266731e-05,
      "loss": 3.3459,
      "step": 65300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.793788024335575e-05,
      "loss": 3.3634,
      "step": 65400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.793467819404419e-05,
      "loss": 3.3373,
      "step": 65500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.793147614473263e-05,
      "loss": 3.364,
      "step": 65600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.792827409542107e-05,
      "loss": 3.3487,
      "step": 65700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7925072046109515e-05,
      "loss": 3.3626,
      "step": 65800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7921869996797955e-05,
      "loss": 3.3631,
      "step": 65900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7918667947486395e-05,
      "loss": 3.3231,
      "step": 66000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.941983699798584,
      "eval_runtime": 114.0811,
      "eval_samples_per_second": 87.657,
      "eval_steps_per_second": 5.479,
      "step": 66000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7915465898174835e-05,
      "loss": 3.3359,
      "step": 66100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7912263848863274e-05,
      "loss": 3.3511,
      "step": 66200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7909061799551714e-05,
      "loss": 3.3578,
      "step": 66300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7905859750240154e-05,
      "loss": 3.343,
      "step": 66400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.79026577009286e-05,
      "loss": 3.3267,
      "step": 66500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7899455651617033e-05,
      "loss": 3.3273,
      "step": 66600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.789625360230548e-05,
      "loss": 3.2954,
      "step": 66700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.789305155299392e-05,
      "loss": 3.3239,
      "step": 66800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.788984950368236e-05,
      "loss": 3.3595,
      "step": 66900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.78866474543708e-05,
      "loss": 3.3442,
      "step": 67000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.940295696258545,
      "eval_runtime": 112.6951,
      "eval_samples_per_second": 88.735,
      "eval_steps_per_second": 5.546,
      "step": 67000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.788344540505924e-05,
      "loss": 3.3327,
      "step": 67100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.788024335574768e-05,
      "loss": 3.3752,
      "step": 67200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.787704130643612e-05,
      "loss": 3.3486,
      "step": 67300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7873839257124565e-05,
      "loss": 3.3365,
      "step": 67400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7870637207813005e-05,
      "loss": 3.3402,
      "step": 67500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7867435158501444e-05,
      "loss": 3.3261,
      "step": 67600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7864233109189884e-05,
      "loss": 3.3462,
      "step": 67700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7861031059878324e-05,
      "loss": 3.3477,
      "step": 67800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7857829010566764e-05,
      "loss": 3.3409,
      "step": 67900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.78546269612552e-05,
      "loss": 3.3356,
      "step": 68000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9393150806427,
      "eval_runtime": 112.9366,
      "eval_samples_per_second": 88.545,
      "eval_steps_per_second": 5.534,
      "step": 68000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.785142491194365e-05,
      "loss": 3.3288,
      "step": 68100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.784822286263208e-05,
      "loss": 3.3387,
      "step": 68200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.784502081332053e-05,
      "loss": 3.3559,
      "step": 68300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.784181876400897e-05,
      "loss": 3.3275,
      "step": 68400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.783861671469741e-05,
      "loss": 3.3125,
      "step": 68500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.783541466538585e-05,
      "loss": 3.3363,
      "step": 68600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.783221261607429e-05,
      "loss": 3.3517,
      "step": 68700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7829010566762735e-05,
      "loss": 3.3276,
      "step": 68800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.782580851745117e-05,
      "loss": 3.3415,
      "step": 68900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7822606468139614e-05,
      "loss": 3.3605,
      "step": 69000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9317407608032227,
      "eval_runtime": 110.4017,
      "eval_samples_per_second": 90.578,
      "eval_steps_per_second": 5.661,
      "step": 69000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7819404418828054e-05,
      "loss": 3.3639,
      "step": 69100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7816202369516494e-05,
      "loss": 3.348,
      "step": 69200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.7813000320204933e-05,
      "loss": 3.341,
      "step": 69300
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.780979827089337e-05,
      "loss": 3.3419,
      "step": 69400
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.780659622158181e-05,
      "loss": 3.3249,
      "step": 69500
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.780339417227025e-05,
      "loss": 3.3521,
      "step": 69600
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.78001921229587e-05,
      "loss": 3.3566,
      "step": 69700
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.779699007364713e-05,
      "loss": 3.3429,
      "step": 69800
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.779378802433558e-05,
      "loss": 3.3602,
      "step": 69900
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.779058597502402e-05,
      "loss": 3.3496,
      "step": 70000
    },
    {
      "epoch": 0.04,
      "eval_loss": 2.9317429065704346,
      "eval_runtime": 113.4558,
      "eval_samples_per_second": 88.14,
      "eval_steps_per_second": 5.509,
      "step": 70000
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.778738392571246e-05,
      "loss": 3.3422,
      "step": 70100
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.77841818764009e-05,
      "loss": 3.3468,
      "step": 70200
    },
    {
      "epoch": 0.04,
      "learning_rate": 4.778097982708934e-05,
      "loss": 3.3243,
      "step": 70300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7777777777777784e-05,
      "loss": 3.3382,
      "step": 70400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.777457572846622e-05,
      "loss": 3.3588,
      "step": 70500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7771373679154664e-05,
      "loss": 3.3207,
      "step": 70600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.77681716298431e-05,
      "loss": 3.3764,
      "step": 70700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.776496958053154e-05,
      "loss": 3.348,
      "step": 70800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.776176753121998e-05,
      "loss": 3.3503,
      "step": 70900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.775856548190842e-05,
      "loss": 3.3318,
      "step": 71000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.929677963256836,
      "eval_runtime": 113.8909,
      "eval_samples_per_second": 87.803,
      "eval_steps_per_second": 5.488,
      "step": 71000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.775536343259687e-05,
      "loss": 3.349,
      "step": 71100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.77521613832853e-05,
      "loss": 3.3439,
      "step": 71200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.774895933397375e-05,
      "loss": 3.3472,
      "step": 71300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.774575728466218e-05,
      "loss": 3.3199,
      "step": 71400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.774255523535063e-05,
      "loss": 3.3473,
      "step": 71500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.773935318603907e-05,
      "loss": 3.3453,
      "step": 71600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.773615113672751e-05,
      "loss": 3.3516,
      "step": 71700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.773294908741595e-05,
      "loss": 3.3279,
      "step": 71800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.772974703810439e-05,
      "loss": 3.3388,
      "step": 71900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7726544988792834e-05,
      "loss": 3.3497,
      "step": 72000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9286766052246094,
      "eval_runtime": 110.7088,
      "eval_samples_per_second": 90.327,
      "eval_steps_per_second": 5.645,
      "step": 72000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7723342939481266e-05,
      "loss": 3.364,
      "step": 72100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.772014089016971e-05,
      "loss": 3.3578,
      "step": 72200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.771693884085815e-05,
      "loss": 3.3163,
      "step": 72300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.771373679154659e-05,
      "loss": 3.3159,
      "step": 72400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.771053474223503e-05,
      "loss": 3.3299,
      "step": 72500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.770733269292347e-05,
      "loss": 3.3219,
      "step": 72600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.770413064361192e-05,
      "loss": 3.3267,
      "step": 72700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.770092859430035e-05,
      "loss": 3.3321,
      "step": 72800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.76977265449888e-05,
      "loss": 3.3393,
      "step": 72900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.769452449567723e-05,
      "loss": 3.3036,
      "step": 73000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9253289699554443,
      "eval_runtime": 109.8567,
      "eval_samples_per_second": 91.028,
      "eval_steps_per_second": 5.689,
      "step": 73000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.769132244636568e-05,
      "loss": 3.3258,
      "step": 73100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.768812039705412e-05,
      "loss": 3.332,
      "step": 73200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.768491834774256e-05,
      "loss": 3.3228,
      "step": 73300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7681716298431003e-05,
      "loss": 3.3041,
      "step": 73400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7678514249119436e-05,
      "loss": 3.3379,
      "step": 73500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.767531219980788e-05,
      "loss": 3.3589,
      "step": 73600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7672110150496316e-05,
      "loss": 3.3124,
      "step": 73700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.766890810118476e-05,
      "loss": 3.3262,
      "step": 73800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.76657060518732e-05,
      "loss": 3.3524,
      "step": 73900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.766250400256164e-05,
      "loss": 3.3469,
      "step": 74000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9242265224456787,
      "eval_runtime": 115.2923,
      "eval_samples_per_second": 86.736,
      "eval_steps_per_second": 5.421,
      "step": 74000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.765930195325009e-05,
      "loss": 3.3098,
      "step": 74100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.765609990393852e-05,
      "loss": 3.2969,
      "step": 74200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.765289785462697e-05,
      "loss": 3.3023,
      "step": 74300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.76496958053154e-05,
      "loss": 3.324,
      "step": 74400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.764649375600385e-05,
      "loss": 3.3011,
      "step": 74500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.764329170669228e-05,
      "loss": 3.3471,
      "step": 74600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.764008965738073e-05,
      "loss": 3.3219,
      "step": 74700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7636887608069167e-05,
      "loss": 3.3004,
      "step": 74800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7633685558757606e-05,
      "loss": 3.3128,
      "step": 74900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.763048350944605e-05,
      "loss": 3.3127,
      "step": 75000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9225473403930664,
      "eval_runtime": 114.4528,
      "eval_samples_per_second": 87.372,
      "eval_steps_per_second": 5.461,
      "step": 75000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7627281460134486e-05,
      "loss": 3.3123,
      "step": 75100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.762407941082293e-05,
      "loss": 3.3242,
      "step": 75200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7620877361511365e-05,
      "loss": 3.2998,
      "step": 75300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.761767531219981e-05,
      "loss": 3.319,
      "step": 75400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.761447326288825e-05,
      "loss": 3.3141,
      "step": 75500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.761127121357669e-05,
      "loss": 3.2889,
      "step": 75600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.760806916426514e-05,
      "loss": 3.3368,
      "step": 75700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.760486711495357e-05,
      "loss": 3.3121,
      "step": 75800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.760166506564202e-05,
      "loss": 3.3335,
      "step": 75900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.759846301633045e-05,
      "loss": 3.3294,
      "step": 76000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9196457862854004,
      "eval_runtime": 116.4758,
      "eval_samples_per_second": 85.855,
      "eval_steps_per_second": 5.366,
      "step": 76000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.75952609670189e-05,
      "loss": 3.3185,
      "step": 76100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.759205891770733e-05,
      "loss": 3.3028,
      "step": 76200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7588856868395776e-05,
      "loss": 3.3203,
      "step": 76300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7585654819084216e-05,
      "loss": 3.3196,
      "step": 76400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7582452769772656e-05,
      "loss": 3.3233,
      "step": 76500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.75792507204611e-05,
      "loss": 3.3138,
      "step": 76600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7576048671149535e-05,
      "loss": 3.312,
      "step": 76700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.757284662183798e-05,
      "loss": 3.3235,
      "step": 76800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7569644572526415e-05,
      "loss": 3.3074,
      "step": 76900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.756644252321486e-05,
      "loss": 3.3362,
      "step": 77000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9194910526275635,
      "eval_runtime": 113.4173,
      "eval_samples_per_second": 88.17,
      "eval_steps_per_second": 5.511,
      "step": 77000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.75632404739033e-05,
      "loss": 3.3148,
      "step": 77100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.756003842459174e-05,
      "loss": 3.3121,
      "step": 77200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.755683637528019e-05,
      "loss": 3.3189,
      "step": 77300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.755363432596862e-05,
      "loss": 3.2913,
      "step": 77400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7550432276657067e-05,
      "loss": 3.3373,
      "step": 77500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.75472302273455e-05,
      "loss": 3.3152,
      "step": 77600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7544028178033946e-05,
      "loss": 3.3264,
      "step": 77700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.754082612872238e-05,
      "loss": 3.2967,
      "step": 77800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7537624079410826e-05,
      "loss": 3.3376,
      "step": 77900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7534422030099265e-05,
      "loss": 3.3189,
      "step": 78000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9132628440856934,
      "eval_runtime": 114.1957,
      "eval_samples_per_second": 87.569,
      "eval_steps_per_second": 5.473,
      "step": 78000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7531219980787705e-05,
      "loss": 3.3131,
      "step": 78100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.752801793147615e-05,
      "loss": 3.3446,
      "step": 78200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7524815882164584e-05,
      "loss": 3.3103,
      "step": 78300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.752161383285303e-05,
      "loss": 3.3229,
      "step": 78400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7518411783541464e-05,
      "loss": 3.3043,
      "step": 78500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.751520973422991e-05,
      "loss": 3.3037,
      "step": 78600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.751200768491835e-05,
      "loss": 3.2955,
      "step": 78700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.750880563560679e-05,
      "loss": 3.3212,
      "step": 78800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7505603586295236e-05,
      "loss": 3.3422,
      "step": 78900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.750240153698367e-05,
      "loss": 3.2832,
      "step": 79000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9174118041992188,
      "eval_runtime": 113.7366,
      "eval_samples_per_second": 87.922,
      "eval_steps_per_second": 5.495,
      "step": 79000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7499199487672116e-05,
      "loss": 3.3321,
      "step": 79100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.749599743836055e-05,
      "loss": 3.3103,
      "step": 79200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7492795389048995e-05,
      "loss": 3.3184,
      "step": 79300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.748959333973743e-05,
      "loss": 3.3133,
      "step": 79400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7486391290425875e-05,
      "loss": 3.3412,
      "step": 79500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7483189241114315e-05,
      "loss": 3.3162,
      "step": 79600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7479987191802754e-05,
      "loss": 3.3042,
      "step": 79700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.74767851424912e-05,
      "loss": 3.3038,
      "step": 79800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7473583093179634e-05,
      "loss": 3.2998,
      "step": 79900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.747038104386808e-05,
      "loss": 3.3119,
      "step": 80000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.913302421569824,
      "eval_runtime": 113.5592,
      "eval_samples_per_second": 88.06,
      "eval_steps_per_second": 5.504,
      "step": 80000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.746717899455651e-05,
      "loss": 3.3055,
      "step": 80100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.746397694524496e-05,
      "loss": 3.3439,
      "step": 80200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.74607748959334e-05,
      "loss": 3.3131,
      "step": 80300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.745757284662184e-05,
      "loss": 3.2762,
      "step": 80400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7454370797310286e-05,
      "loss": 3.2882,
      "step": 80500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.745116874799872e-05,
      "loss": 3.3114,
      "step": 80600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7447966698687165e-05,
      "loss": 3.3005,
      "step": 80700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.74447646493756e-05,
      "loss": 3.285,
      "step": 80800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7441562600064045e-05,
      "loss": 3.3012,
      "step": 80900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7438360550752485e-05,
      "loss": 3.3364,
      "step": 81000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9145188331604004,
      "eval_runtime": 114.1795,
      "eval_samples_per_second": 87.581,
      "eval_steps_per_second": 5.474,
      "step": 81000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7435158501440924e-05,
      "loss": 3.3137,
      "step": 81100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7431956452129364e-05,
      "loss": 3.3042,
      "step": 81200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7428754402817804e-05,
      "loss": 3.3002,
      "step": 81300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.742555235350625e-05,
      "loss": 3.3089,
      "step": 81400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.742235030419468e-05,
      "loss": 3.2972,
      "step": 81500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.741914825488313e-05,
      "loss": 3.3199,
      "step": 81600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.741594620557156e-05,
      "loss": 3.3,
      "step": 81700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.741274415626001e-05,
      "loss": 3.2668,
      "step": 81800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.740954210694845e-05,
      "loss": 3.3037,
      "step": 81900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.740634005763689e-05,
      "loss": 3.3037,
      "step": 82000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9120373725891113,
      "eval_runtime": 114.0118,
      "eval_samples_per_second": 87.71,
      "eval_steps_per_second": 5.482,
      "step": 82000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.740313800832533e-05,
      "loss": 3.3074,
      "step": 82100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.739993595901377e-05,
      "loss": 3.2996,
      "step": 82200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7396733909702215e-05,
      "loss": 3.2892,
      "step": 82300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.739353186039065e-05,
      "loss": 3.2801,
      "step": 82400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7390329811079094e-05,
      "loss": 3.2917,
      "step": 82500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7387127761767534e-05,
      "loss": 3.3005,
      "step": 82600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7383925712455974e-05,
      "loss": 3.2811,
      "step": 82700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.738072366314441e-05,
      "loss": 3.2974,
      "step": 82800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.737752161383285e-05,
      "loss": 3.2885,
      "step": 82900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.73743195645213e-05,
      "loss": 3.317,
      "step": 83000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9070682525634766,
      "eval_runtime": 114.6988,
      "eval_samples_per_second": 87.185,
      "eval_steps_per_second": 5.449,
      "step": 83000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.737111751520973e-05,
      "loss": 3.3206,
      "step": 83100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.736791546589818e-05,
      "loss": 3.2875,
      "step": 83200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.736471341658662e-05,
      "loss": 3.302,
      "step": 83300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.736151136727506e-05,
      "loss": 3.3044,
      "step": 83400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.73583093179635e-05,
      "loss": 3.3058,
      "step": 83500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.735510726865194e-05,
      "loss": 3.3364,
      "step": 83600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.735190521934038e-05,
      "loss": 3.2881,
      "step": 83700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.734870317002882e-05,
      "loss": 3.3213,
      "step": 83800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7345501120717264e-05,
      "loss": 3.2879,
      "step": 83900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7342299071405704e-05,
      "loss": 3.306,
      "step": 84000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9069085121154785,
      "eval_runtime": 110.659,
      "eval_samples_per_second": 90.368,
      "eval_steps_per_second": 5.648,
      "step": 84000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7339097022094144e-05,
      "loss": 3.2886,
      "step": 84100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.733589497278258e-05,
      "loss": 3.285,
      "step": 84200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.733269292347102e-05,
      "loss": 3.3065,
      "step": 84300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.732949087415946e-05,
      "loss": 3.2414,
      "step": 84400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.73262888248479e-05,
      "loss": 3.2939,
      "step": 84500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.732308677553635e-05,
      "loss": 3.2827,
      "step": 84600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.731988472622478e-05,
      "loss": 3.3155,
      "step": 84700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.731668267691323e-05,
      "loss": 3.3061,
      "step": 84800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.731348062760167e-05,
      "loss": 3.2805,
      "step": 84900
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.731027857829011e-05,
      "loss": 3.2946,
      "step": 85000
    },
    {
      "epoch": 0.05,
      "eval_loss": 2.9062347412109375,
      "eval_runtime": 111.048,
      "eval_samples_per_second": 90.051,
      "eval_steps_per_second": 5.628,
      "step": 85000
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.730707652897855e-05,
      "loss": 3.2944,
      "step": 85100
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.730387447966699e-05,
      "loss": 3.2898,
      "step": 85200
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.730067243035543e-05,
      "loss": 3.3019,
      "step": 85300
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.729747038104387e-05,
      "loss": 3.2916,
      "step": 85400
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.7294268331732313e-05,
      "loss": 3.2693,
      "step": 85500
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.729106628242075e-05,
      "loss": 3.2934,
      "step": 85600
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.728786423310919e-05,
      "loss": 3.2843,
      "step": 85700
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.728466218379763e-05,
      "loss": 3.2663,
      "step": 85800
    },
    {
      "epoch": 0.05,
      "learning_rate": 4.728146013448607e-05,
      "loss": 3.2791,
      "step": 85900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.727825808517451e-05,
      "loss": 3.2575,
      "step": 86000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.90578293800354,
      "eval_runtime": 110.2228,
      "eval_samples_per_second": 90.725,
      "eval_steps_per_second": 5.67,
      "step": 86000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.727505603586295e-05,
      "loss": 3.3096,
      "step": 86100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.72718539865514e-05,
      "loss": 3.2863,
      "step": 86200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.726865193723984e-05,
      "loss": 3.3037,
      "step": 86300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.726544988792828e-05,
      "loss": 3.2867,
      "step": 86400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.726224783861672e-05,
      "loss": 3.2935,
      "step": 86500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.725904578930516e-05,
      "loss": 3.2586,
      "step": 86600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.72558437399936e-05,
      "loss": 3.2894,
      "step": 86700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.725264169068204e-05,
      "loss": 3.2794,
      "step": 86800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7249439641370477e-05,
      "loss": 3.289,
      "step": 86900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7246237592058916e-05,
      "loss": 3.291,
      "step": 87000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.9041857719421387,
      "eval_runtime": 114.2568,
      "eval_samples_per_second": 87.522,
      "eval_steps_per_second": 5.47,
      "step": 87000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.724303554274736e-05,
      "loss": 3.2967,
      "step": 87100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.72398334934358e-05,
      "loss": 3.2819,
      "step": 87200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.723663144412424e-05,
      "loss": 3.3228,
      "step": 87300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.723342939481268e-05,
      "loss": 3.2895,
      "step": 87400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.723022734550112e-05,
      "loss": 3.3139,
      "step": 87500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.722702529618956e-05,
      "loss": 3.2649,
      "step": 87600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7223823246878e-05,
      "loss": 3.295,
      "step": 87700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.722062119756645e-05,
      "loss": 3.2755,
      "step": 87800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.721741914825489e-05,
      "loss": 3.2862,
      "step": 87900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.721421709894333e-05,
      "loss": 3.2782,
      "step": 88000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8991758823394775,
      "eval_runtime": 115.0653,
      "eval_samples_per_second": 86.907,
      "eval_steps_per_second": 5.432,
      "step": 88000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.721101504963177e-05,
      "loss": 3.2899,
      "step": 88100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.720781300032021e-05,
      "loss": 3.3034,
      "step": 88200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7204610951008646e-05,
      "loss": 3.291,
      "step": 88300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7201408901697086e-05,
      "loss": 3.2844,
      "step": 88400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7198206852385526e-05,
      "loss": 3.2891,
      "step": 88500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.719500480307397e-05,
      "loss": 3.278,
      "step": 88600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.719180275376241e-05,
      "loss": 3.2492,
      "step": 88700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.718860070445085e-05,
      "loss": 3.2939,
      "step": 88800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.718539865513929e-05,
      "loss": 3.2794,
      "step": 88900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.718219660582773e-05,
      "loss": 3.2839,
      "step": 89000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.9015512466430664,
      "eval_runtime": 114.1082,
      "eval_samples_per_second": 87.636,
      "eval_steps_per_second": 5.477,
      "step": 89000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.717899455651617e-05,
      "loss": 3.2579,
      "step": 89100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.717579250720461e-05,
      "loss": 3.2932,
      "step": 89200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.717259045789305e-05,
      "loss": 3.2797,
      "step": 89300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.71693884085815e-05,
      "loss": 3.2723,
      "step": 89400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.716618635926994e-05,
      "loss": 3.2636,
      "step": 89500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7162984309958377e-05,
      "loss": 3.2737,
      "step": 89600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7159782260646816e-05,
      "loss": 3.2702,
      "step": 89700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7156580211335256e-05,
      "loss": 3.2644,
      "step": 89800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7153378162023696e-05,
      "loss": 3.2924,
      "step": 89900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7150176112712136e-05,
      "loss": 3.2835,
      "step": 90000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8985331058502197,
      "eval_runtime": 113.7553,
      "eval_samples_per_second": 87.908,
      "eval_steps_per_second": 5.494,
      "step": 90000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7146974063400575e-05,
      "loss": 3.2862,
      "step": 90100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.714377201408902e-05,
      "loss": 3.2696,
      "step": 90200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.714056996477746e-05,
      "loss": 3.2834,
      "step": 90300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.71373679154659e-05,
      "loss": 3.2729,
      "step": 90400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.713416586615434e-05,
      "loss": 3.265,
      "step": 90500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.713096381684278e-05,
      "loss": 3.2495,
      "step": 90600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.712776176753122e-05,
      "loss": 3.2376,
      "step": 90700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.712455971821966e-05,
      "loss": 3.2816,
      "step": 90800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.712135766890811e-05,
      "loss": 3.2417,
      "step": 90900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7118155619596546e-05,
      "loss": 3.3108,
      "step": 91000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8956241607666016,
      "eval_runtime": 109.767,
      "eval_samples_per_second": 91.102,
      "eval_steps_per_second": 5.694,
      "step": 91000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7114953570284986e-05,
      "loss": 3.3062,
      "step": 91100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7111751520973426e-05,
      "loss": 3.2835,
      "step": 91200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7108549471661866e-05,
      "loss": 3.2711,
      "step": 91300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7105347422350305e-05,
      "loss": 3.2925,
      "step": 91400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7102145373038745e-05,
      "loss": 3.2489,
      "step": 91500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.709894332372719e-05,
      "loss": 3.2674,
      "step": 91600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7095741274415625e-05,
      "loss": 3.2597,
      "step": 91700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.709253922510407e-05,
      "loss": 3.2754,
      "step": 91800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.708933717579251e-05,
      "loss": 3.2979,
      "step": 91900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.708613512648095e-05,
      "loss": 3.282,
      "step": 92000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8957114219665527,
      "eval_runtime": 109.2696,
      "eval_samples_per_second": 91.517,
      "eval_steps_per_second": 5.72,
      "step": 92000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.708293307716939e-05,
      "loss": 3.2764,
      "step": 92100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.707973102785783e-05,
      "loss": 3.286,
      "step": 92200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.707652897854627e-05,
      "loss": 3.2663,
      "step": 92300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.707332692923471e-05,
      "loss": 3.2792,
      "step": 92400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7070124879923156e-05,
      "loss": 3.2732,
      "step": 92500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7066922830611596e-05,
      "loss": 3.2882,
      "step": 92600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7063720781300036e-05,
      "loss": 3.2879,
      "step": 92700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7060518731988475e-05,
      "loss": 3.2758,
      "step": 92800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7057316682676915e-05,
      "loss": 3.2613,
      "step": 92900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7054114633365355e-05,
      "loss": 3.2581,
      "step": 93000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.893002986907959,
      "eval_runtime": 114.4301,
      "eval_samples_per_second": 87.39,
      "eval_steps_per_second": 5.462,
      "step": 93000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7050912584053795e-05,
      "loss": 3.2496,
      "step": 93100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.704771053474224e-05,
      "loss": 3.2936,
      "step": 93200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7044508485430674e-05,
      "loss": 3.25,
      "step": 93300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.704130643611912e-05,
      "loss": 3.2418,
      "step": 93400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.703810438680756e-05,
      "loss": 3.2727,
      "step": 93500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7034902337496e-05,
      "loss": 3.2664,
      "step": 93600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.703170028818444e-05,
      "loss": 3.2823,
      "step": 93700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.702849823887288e-05,
      "loss": 3.2592,
      "step": 93800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7025296189561326e-05,
      "loss": 3.2725,
      "step": 93900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.702209414024976e-05,
      "loss": 3.285,
      "step": 94000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.893744468688965,
      "eval_runtime": 114.875,
      "eval_samples_per_second": 87.051,
      "eval_steps_per_second": 5.441,
      "step": 94000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7018892090938205e-05,
      "loss": 3.2549,
      "step": 94100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7015690041626645e-05,
      "loss": 3.2741,
      "step": 94200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7012487992315085e-05,
      "loss": 3.2614,
      "step": 94300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7009285943003525e-05,
      "loss": 3.2675,
      "step": 94400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7006083893691964e-05,
      "loss": 3.2945,
      "step": 94500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.7002881844380404e-05,
      "loss": 3.276,
      "step": 94600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6999679795068844e-05,
      "loss": 3.2651,
      "step": 94700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.699647774575729e-05,
      "loss": 3.2772,
      "step": 94800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6993275696445723e-05,
      "loss": 3.2866,
      "step": 94900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.699007364713417e-05,
      "loss": 3.2739,
      "step": 95000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8948404788970947,
      "eval_runtime": 111.2183,
      "eval_samples_per_second": 89.913,
      "eval_steps_per_second": 5.62,
      "step": 95000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.698687159782261e-05,
      "loss": 3.2856,
      "step": 95100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.698366954851105e-05,
      "loss": 3.2705,
      "step": 95200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.698046749919949e-05,
      "loss": 3.262,
      "step": 95300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.697726544988793e-05,
      "loss": 3.256,
      "step": 95400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6974063400576375e-05,
      "loss": 3.2831,
      "step": 95500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.697086135126481e-05,
      "loss": 3.2791,
      "step": 95600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6967659301953255e-05,
      "loss": 3.2325,
      "step": 95700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6964457252641695e-05,
      "loss": 3.2702,
      "step": 95800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6961255203330134e-05,
      "loss": 3.2577,
      "step": 95900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6958053154018574e-05,
      "loss": 3.2769,
      "step": 96000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.889333963394165,
      "eval_runtime": 116.7422,
      "eval_samples_per_second": 85.659,
      "eval_steps_per_second": 5.354,
      "step": 96000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6954851104707014e-05,
      "loss": 3.274,
      "step": 96100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.695164905539546e-05,
      "loss": 3.2774,
      "step": 96200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.694844700608389e-05,
      "loss": 3.2736,
      "step": 96300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.694524495677234e-05,
      "loss": 3.259,
      "step": 96400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.694204290746077e-05,
      "loss": 3.2723,
      "step": 96500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.693884085814922e-05,
      "loss": 3.2726,
      "step": 96600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.693563880883766e-05,
      "loss": 3.2714,
      "step": 96700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.69324367595261e-05,
      "loss": 3.2879,
      "step": 96800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.692923471021454e-05,
      "loss": 3.2749,
      "step": 96900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.692603266090298e-05,
      "loss": 3.2547,
      "step": 97000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8881773948669434,
      "eval_runtime": 117.8935,
      "eval_samples_per_second": 84.822,
      "eval_steps_per_second": 5.301,
      "step": 97000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6922830611591425e-05,
      "loss": 3.3071,
      "step": 97100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.691962856227986e-05,
      "loss": 3.2796,
      "step": 97200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6916426512968304e-05,
      "loss": 3.2751,
      "step": 97300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6913224463656744e-05,
      "loss": 3.2849,
      "step": 97400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6910022414345184e-05,
      "loss": 3.2669,
      "step": 97500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6906820365033623e-05,
      "loss": 3.2734,
      "step": 97600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.690361831572206e-05,
      "loss": 3.2715,
      "step": 97700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.690041626641051e-05,
      "loss": 3.2664,
      "step": 97800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.689721421709894e-05,
      "loss": 3.282,
      "step": 97900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.689401216778739e-05,
      "loss": 3.2555,
      "step": 98000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.889669179916382,
      "eval_runtime": 110.6475,
      "eval_samples_per_second": 90.377,
      "eval_steps_per_second": 5.649,
      "step": 98000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.689081011847582e-05,
      "loss": 3.2731,
      "step": 98100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.688760806916427e-05,
      "loss": 3.292,
      "step": 98200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.688440601985271e-05,
      "loss": 3.2899,
      "step": 98300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.688120397054115e-05,
      "loss": 3.2614,
      "step": 98400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6878001921229595e-05,
      "loss": 3.2635,
      "step": 98500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.687479987191803e-05,
      "loss": 3.2639,
      "step": 98600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6871597822606474e-05,
      "loss": 3.2741,
      "step": 98700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.686839577329491e-05,
      "loss": 3.2685,
      "step": 98800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6865193723983354e-05,
      "loss": 3.2667,
      "step": 98900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.686199167467179e-05,
      "loss": 3.2732,
      "step": 99000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.888998508453369,
      "eval_runtime": 116.1073,
      "eval_samples_per_second": 86.127,
      "eval_steps_per_second": 5.383,
      "step": 99000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.685878962536023e-05,
      "loss": 3.2835,
      "step": 99100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.685558757604867e-05,
      "loss": 3.281,
      "step": 99200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.685238552673711e-05,
      "loss": 3.2796,
      "step": 99300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.684918347742556e-05,
      "loss": 3.2526,
      "step": 99400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.684598142811399e-05,
      "loss": 3.2389,
      "step": 99500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.684277937880244e-05,
      "loss": 3.2678,
      "step": 99600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.683957732949087e-05,
      "loss": 3.2722,
      "step": 99700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.683637528017932e-05,
      "loss": 3.2444,
      "step": 99800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.683317323086776e-05,
      "loss": 3.2667,
      "step": 99900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.68299711815562e-05,
      "loss": 3.2821,
      "step": 100000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8874640464782715,
      "eval_runtime": 110.2537,
      "eval_samples_per_second": 90.7,
      "eval_steps_per_second": 5.669,
      "step": 100000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6826769132244644e-05,
      "loss": 3.2409,
      "step": 100100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.682356708293308e-05,
      "loss": 3.2701,
      "step": 100200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6820365033621523e-05,
      "loss": 3.2611,
      "step": 100300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.6817162984309956e-05,
      "loss": 3.2521,
      "step": 100400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.68139609349984e-05,
      "loss": 3.2723,
      "step": 100500
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.681075888568684e-05,
      "loss": 3.2633,
      "step": 100600
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.680755683637528e-05,
      "loss": 3.2751,
      "step": 100700
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.680435478706372e-05,
      "loss": 3.2416,
      "step": 100800
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.680115273775216e-05,
      "loss": 3.271,
      "step": 100900
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.679795068844061e-05,
      "loss": 3.2421,
      "step": 101000
    },
    {
      "epoch": 0.06,
      "eval_loss": 2.8844504356384277,
      "eval_runtime": 110.3871,
      "eval_samples_per_second": 90.59,
      "eval_steps_per_second": 5.662,
      "step": 101000
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.679474863912904e-05,
      "loss": 3.2568,
      "step": 101100
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.679154658981749e-05,
      "loss": 3.2503,
      "step": 101200
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.678834454050592e-05,
      "loss": 3.2433,
      "step": 101300
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.678514249119437e-05,
      "loss": 3.2726,
      "step": 101400
    },
    {
      "epoch": 0.06,
      "learning_rate": 4.678194044188281e-05,
      "loss": 3.2492,
      "step": 101500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.677873839257125e-05,
      "loss": 3.2449,
      "step": 101600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6775536343259693e-05,
      "loss": 3.2622,
      "step": 101700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6772334293948126e-05,
      "loss": 3.2576,
      "step": 101800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.676913224463657e-05,
      "loss": 3.2295,
      "step": 101900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6765930195325006e-05,
      "loss": 3.2443,
      "step": 102000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8830251693725586,
      "eval_runtime": 115.1917,
      "eval_samples_per_second": 86.812,
      "eval_steps_per_second": 5.426,
      "step": 102000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.676272814601345e-05,
      "loss": 3.279,
      "step": 102100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.675952609670189e-05,
      "loss": 3.2413,
      "step": 102200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.675632404739033e-05,
      "loss": 3.2445,
      "step": 102300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.675312199807877e-05,
      "loss": 3.2484,
      "step": 102400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.674991994876721e-05,
      "loss": 3.2372,
      "step": 102500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.674671789945566e-05,
      "loss": 3.2749,
      "step": 102600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.674351585014409e-05,
      "loss": 3.2418,
      "step": 102700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.674031380083254e-05,
      "loss": 3.2457,
      "step": 102800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.673711175152097e-05,
      "loss": 3.2555,
      "step": 102900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.673390970220942e-05,
      "loss": 3.2542,
      "step": 103000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8825299739837646,
      "eval_runtime": 110.4931,
      "eval_samples_per_second": 90.503,
      "eval_steps_per_second": 5.656,
      "step": 103000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6730707652897857e-05,
      "loss": 3.2725,
      "step": 103100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6727505603586296e-05,
      "loss": 3.2286,
      "step": 103200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.672430355427474e-05,
      "loss": 3.2447,
      "step": 103300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6721101504963176e-05,
      "loss": 3.2447,
      "step": 103400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.671789945565162e-05,
      "loss": 3.2352,
      "step": 103500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6714697406340055e-05,
      "loss": 3.2511,
      "step": 103600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.67114953570285e-05,
      "loss": 3.243,
      "step": 103700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.670829330771694e-05,
      "loss": 3.2461,
      "step": 103800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.670509125840538e-05,
      "loss": 3.268,
      "step": 103900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.670188920909382e-05,
      "loss": 3.2163,
      "step": 104000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.882269859313965,
      "eval_runtime": 115.6117,
      "eval_samples_per_second": 86.496,
      "eval_steps_per_second": 5.406,
      "step": 104000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.669868715978226e-05,
      "loss": 3.2674,
      "step": 104100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.669548511047071e-05,
      "loss": 3.243,
      "step": 104200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.669228306115914e-05,
      "loss": 3.2436,
      "step": 104300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.668908101184759e-05,
      "loss": 3.2344,
      "step": 104400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.668587896253602e-05,
      "loss": 3.2574,
      "step": 104500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6682676913224466e-05,
      "loss": 3.2576,
      "step": 104600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6679474863912906e-05,
      "loss": 3.2605,
      "step": 104700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6676272814601346e-05,
      "loss": 3.2451,
      "step": 104800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.667307076528979e-05,
      "loss": 3.2272,
      "step": 104900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6669868715978225e-05,
      "loss": 3.228,
      "step": 105000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8781888484954834,
      "eval_runtime": 112.9704,
      "eval_samples_per_second": 88.519,
      "eval_steps_per_second": 5.532,
      "step": 105000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.666666666666667e-05,
      "loss": 3.2398,
      "step": 105100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6663464617355105e-05,
      "loss": 3.2633,
      "step": 105200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.666026256804355e-05,
      "loss": 3.2202,
      "step": 105300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.665706051873199e-05,
      "loss": 3.256,
      "step": 105400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.665385846942043e-05,
      "loss": 3.253,
      "step": 105500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.665065642010887e-05,
      "loss": 3.213,
      "step": 105600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.664745437079731e-05,
      "loss": 3.2511,
      "step": 105700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6644252321485757e-05,
      "loss": 3.244,
      "step": 105800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.664105027217419e-05,
      "loss": 3.2342,
      "step": 105900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6637848222862636e-05,
      "loss": 3.2564,
      "step": 106000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8760592937469482,
      "eval_runtime": 118.3044,
      "eval_samples_per_second": 84.528,
      "eval_steps_per_second": 5.283,
      "step": 106000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6634646173551076e-05,
      "loss": 3.2519,
      "step": 106100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6631444124239516e-05,
      "loss": 3.2529,
      "step": 106200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6628242074927955e-05,
      "loss": 3.2687,
      "step": 106300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6625040025616395e-05,
      "loss": 3.2559,
      "step": 106400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.662183797630484e-05,
      "loss": 3.2501,
      "step": 106500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6618635926993274e-05,
      "loss": 3.2459,
      "step": 106600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.661543387768172e-05,
      "loss": 3.2756,
      "step": 106700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6612231828370154e-05,
      "loss": 3.2495,
      "step": 106800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.66090297790586e-05,
      "loss": 3.2844,
      "step": 106900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.660582772974704e-05,
      "loss": 3.2354,
      "step": 107000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8758106231689453,
      "eval_runtime": 118.827,
      "eval_samples_per_second": 84.156,
      "eval_steps_per_second": 5.26,
      "step": 107000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.660262568043548e-05,
      "loss": 3.2838,
      "step": 107100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.659942363112392e-05,
      "loss": 3.2366,
      "step": 107200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.659622158181236e-05,
      "loss": 3.2689,
      "step": 107300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6593019532500806e-05,
      "loss": 3.243,
      "step": 107400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.658981748318924e-05,
      "loss": 3.2527,
      "step": 107500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6586615433877685e-05,
      "loss": 3.2338,
      "step": 107600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6583413384566125e-05,
      "loss": 3.2316,
      "step": 107700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6580211335254565e-05,
      "loss": 3.2245,
      "step": 107800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6577009285943005e-05,
      "loss": 3.2468,
      "step": 107900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6573807236631444e-05,
      "loss": 3.2511,
      "step": 108000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.875925302505493,
      "eval_runtime": 111.6523,
      "eval_samples_per_second": 89.564,
      "eval_steps_per_second": 5.598,
      "step": 108000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.657060518731989e-05,
      "loss": 3.2369,
      "step": 108100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6567403138008324e-05,
      "loss": 3.2371,
      "step": 108200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.656420108869677e-05,
      "loss": 3.2518,
      "step": 108300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.656099903938521e-05,
      "loss": 3.2597,
      "step": 108400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.655779699007365e-05,
      "loss": 3.2555,
      "step": 108500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.655459494076209e-05,
      "loss": 3.2599,
      "step": 108600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.655139289145053e-05,
      "loss": 3.2637,
      "step": 108700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.654819084213897e-05,
      "loss": 3.2355,
      "step": 108800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.654498879282741e-05,
      "loss": 3.2328,
      "step": 108900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6541786743515855e-05,
      "loss": 3.2455,
      "step": 109000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8770925998687744,
      "eval_runtime": 116.2885,
      "eval_samples_per_second": 85.993,
      "eval_steps_per_second": 5.375,
      "step": 109000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.653858469420429e-05,
      "loss": 3.231,
      "step": 109100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6535382644892735e-05,
      "loss": 3.2595,
      "step": 109200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6532180595581175e-05,
      "loss": 3.2393,
      "step": 109300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6528978546269614e-05,
      "loss": 3.26,
      "step": 109400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6525776496958054e-05,
      "loss": 3.2549,
      "step": 109500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6522574447646494e-05,
      "loss": 3.2339,
      "step": 109600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.651937239833494e-05,
      "loss": 3.2178,
      "step": 109700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.651617034902337e-05,
      "loss": 3.2631,
      "step": 109800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.651296829971182e-05,
      "loss": 3.2868,
      "step": 109900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.650976625040026e-05,
      "loss": 3.2635,
      "step": 110000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8764114379882812,
      "eval_runtime": 110.6577,
      "eval_samples_per_second": 90.369,
      "eval_steps_per_second": 5.648,
      "step": 110000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.65065642010887e-05,
      "loss": 3.2676,
      "step": 110100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.650336215177714e-05,
      "loss": 3.2284,
      "step": 110200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.650016010246558e-05,
      "loss": 3.2462,
      "step": 110300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.649695805315402e-05,
      "loss": 3.2418,
      "step": 110400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.649375600384246e-05,
      "loss": 3.2434,
      "step": 110500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6490553954530905e-05,
      "loss": 3.2566,
      "step": 110600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6487351905219344e-05,
      "loss": 3.2226,
      "step": 110700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6484149855907784e-05,
      "loss": 3.2306,
      "step": 110800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6480947806596224e-05,
      "loss": 3.2814,
      "step": 110900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6477745757284664e-05,
      "loss": 3.2363,
      "step": 111000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8719120025634766,
      "eval_runtime": 115.0943,
      "eval_samples_per_second": 86.885,
      "eval_steps_per_second": 5.43,
      "step": 111000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.64745437079731e-05,
      "loss": 3.2505,
      "step": 111100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.647134165866154e-05,
      "loss": 3.2092,
      "step": 111200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.646813960934999e-05,
      "loss": 3.2458,
      "step": 111300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.646493756003843e-05,
      "loss": 3.2358,
      "step": 111400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.646173551072687e-05,
      "loss": 3.2538,
      "step": 111500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.645853346141531e-05,
      "loss": 3.2541,
      "step": 111600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.645533141210375e-05,
      "loss": 3.2529,
      "step": 111700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.645212936279219e-05,
      "loss": 3.2572,
      "step": 111800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.644892731348063e-05,
      "loss": 3.2687,
      "step": 111900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.644572526416907e-05,
      "loss": 3.2489,
      "step": 112000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.868739366531372,
      "eval_runtime": 110.5182,
      "eval_samples_per_second": 90.483,
      "eval_steps_per_second": 5.655,
      "step": 112000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.644252321485751e-05,
      "loss": 3.2506,
      "step": 112100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6439321165545954e-05,
      "loss": 3.2428,
      "step": 112200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6436119116234394e-05,
      "loss": 3.2647,
      "step": 112300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6432917066922834e-05,
      "loss": 3.235,
      "step": 112400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.642971501761127e-05,
      "loss": 3.2114,
      "step": 112500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.642651296829971e-05,
      "loss": 3.2246,
      "step": 112600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.642331091898815e-05,
      "loss": 3.2388,
      "step": 112700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.642010886967659e-05,
      "loss": 3.247,
      "step": 112800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.641690682036504e-05,
      "loss": 3.2438,
      "step": 112900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.641370477105348e-05,
      "loss": 3.2316,
      "step": 113000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8713855743408203,
      "eval_runtime": 110.3239,
      "eval_samples_per_second": 90.642,
      "eval_steps_per_second": 5.665,
      "step": 113000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.641050272174192e-05,
      "loss": 3.2748,
      "step": 113100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.640730067243036e-05,
      "loss": 3.244,
      "step": 113200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.64040986231188e-05,
      "loss": 3.2707,
      "step": 113300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.640089657380724e-05,
      "loss": 3.253,
      "step": 113400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.639769452449568e-05,
      "loss": 3.216,
      "step": 113500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.639449247518412e-05,
      "loss": 3.2189,
      "step": 113600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6391290425872564e-05,
      "loss": 3.2285,
      "step": 113700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6388088376561003e-05,
      "loss": 3.2202,
      "step": 113800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.638488632724944e-05,
      "loss": 3.2338,
      "step": 113900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.638168427793788e-05,
      "loss": 3.2277,
      "step": 114000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8686187267303467,
      "eval_runtime": 116.1535,
      "eval_samples_per_second": 86.093,
      "eval_steps_per_second": 5.381,
      "step": 114000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.637848222862632e-05,
      "loss": 3.2452,
      "step": 114100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.637528017931476e-05,
      "loss": 3.2431,
      "step": 114200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.63720781300032e-05,
      "loss": 3.244,
      "step": 114300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.636887608069164e-05,
      "loss": 3.2434,
      "step": 114400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.636567403138009e-05,
      "loss": 3.2164,
      "step": 114500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.636247198206853e-05,
      "loss": 3.2539,
      "step": 114600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.635926993275697e-05,
      "loss": 3.2167,
      "step": 114700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.635606788344541e-05,
      "loss": 3.23,
      "step": 114800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.635286583413385e-05,
      "loss": 3.2352,
      "step": 114900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.634966378482229e-05,
      "loss": 3.2509,
      "step": 115000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.8686671257019043,
      "eval_runtime": 110.9229,
      "eval_samples_per_second": 90.153,
      "eval_steps_per_second": 5.635,
      "step": 115000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.634646173551073e-05,
      "loss": 3.2303,
      "step": 115100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6343259686199167e-05,
      "loss": 3.2193,
      "step": 115200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.634005763688761e-05,
      "loss": 3.21,
      "step": 115300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.633685558757605e-05,
      "loss": 3.2271,
      "step": 115400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.633365353826449e-05,
      "loss": 3.2483,
      "step": 115500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.633045148895293e-05,
      "loss": 3.249,
      "step": 115600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.632724943964137e-05,
      "loss": 3.274,
      "step": 115700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.632404739032981e-05,
      "loss": 3.2382,
      "step": 115800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.632084534101825e-05,
      "loss": 3.2127,
      "step": 115900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.63176432917067e-05,
      "loss": 3.2593,
      "step": 116000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.868096351623535,
      "eval_runtime": 117.3799,
      "eval_samples_per_second": 85.193,
      "eval_steps_per_second": 5.325,
      "step": 116000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.631444124239514e-05,
      "loss": 3.2546,
      "step": 116100
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.631123919308358e-05,
      "loss": 3.2368,
      "step": 116200
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.630803714377202e-05,
      "loss": 3.2642,
      "step": 116300
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.630483509446046e-05,
      "loss": 3.2451,
      "step": 116400
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.63016330451489e-05,
      "loss": 3.2491,
      "step": 116500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6298430995837336e-05,
      "loss": 3.2258,
      "step": 116600
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6295228946525776e-05,
      "loss": 3.2381,
      "step": 116700
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.6292026897214216e-05,
      "loss": 3.2468,
      "step": 116800
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.628882484790266e-05,
      "loss": 3.2418,
      "step": 116900
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.62856227985911e-05,
      "loss": 3.2307,
      "step": 117000
    },
    {
      "epoch": 0.07,
      "eval_loss": 2.86719012260437,
      "eval_runtime": 110.6159,
      "eval_samples_per_second": 90.403,
      "eval_steps_per_second": 5.65,
      "step": 117000
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.628242074927954e-05,
      "loss": 3.2428,
      "step": 117100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.627921869996798e-05,
      "loss": 3.2094,
      "step": 117200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.627601665065642e-05,
      "loss": 3.2151,
      "step": 117300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.627281460134486e-05,
      "loss": 3.2165,
      "step": 117400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.62696125520333e-05,
      "loss": 3.2584,
      "step": 117500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.626641050272175e-05,
      "loss": 3.2394,
      "step": 117600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.626320845341019e-05,
      "loss": 3.2436,
      "step": 117700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.626000640409863e-05,
      "loss": 3.2345,
      "step": 117800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6256804354787067e-05,
      "loss": 3.2221,
      "step": 117900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6253602305475506e-05,
      "loss": 3.2396,
      "step": 118000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8640897274017334,
      "eval_runtime": 110.5337,
      "eval_samples_per_second": 90.47,
      "eval_steps_per_second": 5.654,
      "step": 118000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6250400256163946e-05,
      "loss": 3.2359,
      "step": 118100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6247198206852386e-05,
      "loss": 3.2293,
      "step": 118200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.624399615754083e-05,
      "loss": 3.2235,
      "step": 118300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6240794108229265e-05,
      "loss": 3.2187,
      "step": 118400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.623759205891771e-05,
      "loss": 3.2149,
      "step": 118500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.623439000960615e-05,
      "loss": 3.2697,
      "step": 118600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.623118796029459e-05,
      "loss": 3.2378,
      "step": 118700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.622798591098303e-05,
      "loss": 3.2274,
      "step": 118800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.622478386167147e-05,
      "loss": 3.244,
      "step": 118900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.622158181235991e-05,
      "loss": 3.2339,
      "step": 119000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8630216121673584,
      "eval_runtime": 117.662,
      "eval_samples_per_second": 84.989,
      "eval_steps_per_second": 5.312,
      "step": 119000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.621837976304835e-05,
      "loss": 3.2355,
      "step": 119100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.62151777137368e-05,
      "loss": 3.2565,
      "step": 119200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6211975664425236e-05,
      "loss": 3.2413,
      "step": 119300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6208773615113676e-05,
      "loss": 3.2281,
      "step": 119400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6205571565802116e-05,
      "loss": 3.1887,
      "step": 119500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6202369516490556e-05,
      "loss": 3.2357,
      "step": 119600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6199167467178995e-05,
      "loss": 3.1899,
      "step": 119700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6195965417867435e-05,
      "loss": 3.2346,
      "step": 119800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.619276336855588e-05,
      "loss": 3.1854,
      "step": 119900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6189561319244315e-05,
      "loss": 3.1965,
      "step": 120000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8603975772857666,
      "eval_runtime": 110.7087,
      "eval_samples_per_second": 90.327,
      "eval_steps_per_second": 5.645,
      "step": 120000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.618635926993276e-05,
      "loss": 3.2035,
      "step": 120100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.61831572206212e-05,
      "loss": 3.2221,
      "step": 120200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.617995517130964e-05,
      "loss": 3.2239,
      "step": 120300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.617675312199808e-05,
      "loss": 3.2309,
      "step": 120400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.617355107268652e-05,
      "loss": 3.2065,
      "step": 120500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.617034902337497e-05,
      "loss": 3.2439,
      "step": 120600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.61671469740634e-05,
      "loss": 3.2255,
      "step": 120700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6163944924751846e-05,
      "loss": 3.2068,
      "step": 120800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6160742875440286e-05,
      "loss": 3.2107,
      "step": 120900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6157540826128726e-05,
      "loss": 3.2139,
      "step": 121000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8581159114837646,
      "eval_runtime": 116.2071,
      "eval_samples_per_second": 86.053,
      "eval_steps_per_second": 5.378,
      "step": 121000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6154338776817165e-05,
      "loss": 3.2341,
      "step": 121100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6151136727505605e-05,
      "loss": 3.2224,
      "step": 121200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.614793467819405e-05,
      "loss": 3.2178,
      "step": 121300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6144732628882485e-05,
      "loss": 3.2081,
      "step": 121400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.614153057957093e-05,
      "loss": 3.2335,
      "step": 121500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6138328530259364e-05,
      "loss": 3.2179,
      "step": 121600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.613512648094781e-05,
      "loss": 3.1973,
      "step": 121700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.613192443163625e-05,
      "loss": 3.2338,
      "step": 121800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.612872238232469e-05,
      "loss": 3.2168,
      "step": 121900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.612552033301313e-05,
      "loss": 3.2205,
      "step": 122000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.859097480773926,
      "eval_runtime": 115.0065,
      "eval_samples_per_second": 86.952,
      "eval_steps_per_second": 5.434,
      "step": 122000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.612231828370157e-05,
      "loss": 3.2107,
      "step": 122100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6119116234390016e-05,
      "loss": 3.2199,
      "step": 122200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.611591418507845e-05,
      "loss": 3.2004,
      "step": 122300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6112712135766895e-05,
      "loss": 3.2338,
      "step": 122400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6109510086455335e-05,
      "loss": 3.2115,
      "step": 122500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6106308037143775e-05,
      "loss": 3.2144,
      "step": 122600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6103105987832215e-05,
      "loss": 3.212,
      "step": 122700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6099903938520654e-05,
      "loss": 3.2438,
      "step": 122800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.60967018892091e-05,
      "loss": 3.192,
      "step": 122900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6093499839897534e-05,
      "loss": 3.2347,
      "step": 123000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.857823133468628,
      "eval_runtime": 109.7492,
      "eval_samples_per_second": 91.117,
      "eval_steps_per_second": 5.695,
      "step": 123000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.609029779058598e-05,
      "loss": 3.1988,
      "step": 123100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6087095741274413e-05,
      "loss": 3.2081,
      "step": 123200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.608389369196286e-05,
      "loss": 3.1983,
      "step": 123300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.60806916426513e-05,
      "loss": 3.2171,
      "step": 123400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.607748959333974e-05,
      "loss": 3.2275,
      "step": 123500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6074287544028186e-05,
      "loss": 3.2148,
      "step": 123600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.607108549471662e-05,
      "loss": 3.2129,
      "step": 123700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6067883445405065e-05,
      "loss": 3.2377,
      "step": 123800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.60646813960935e-05,
      "loss": 3.2374,
      "step": 123900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6061479346781945e-05,
      "loss": 3.2134,
      "step": 124000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.859022378921509,
      "eval_runtime": 116.4686,
      "eval_samples_per_second": 85.86,
      "eval_steps_per_second": 5.366,
      "step": 124000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6058277297470385e-05,
      "loss": 3.2094,
      "step": 124100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6055075248158824e-05,
      "loss": 3.2288,
      "step": 124200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6051873198847264e-05,
      "loss": 3.2409,
      "step": 124300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6048671149535704e-05,
      "loss": 3.2062,
      "step": 124400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.604546910022415e-05,
      "loss": 3.2223,
      "step": 124500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.604226705091258e-05,
      "loss": 3.215,
      "step": 124600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.603906500160103e-05,
      "loss": 3.2201,
      "step": 124700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.603586295228946e-05,
      "loss": 3.2103,
      "step": 124800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.603266090297791e-05,
      "loss": 3.2236,
      "step": 124900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.602945885366635e-05,
      "loss": 3.231,
      "step": 125000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.855792284011841,
      "eval_runtime": 117.8897,
      "eval_samples_per_second": 84.825,
      "eval_steps_per_second": 5.302,
      "step": 125000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.602625680435479e-05,
      "loss": 3.181,
      "step": 125100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6023054755043235e-05,
      "loss": 3.213,
      "step": 125200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.601985270573167e-05,
      "loss": 3.2188,
      "step": 125300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6016650656420115e-05,
      "loss": 3.2282,
      "step": 125400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.601344860710855e-05,
      "loss": 3.2157,
      "step": 125500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6010246557796994e-05,
      "loss": 3.2278,
      "step": 125600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6007044508485434e-05,
      "loss": 3.2051,
      "step": 125700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6003842459173874e-05,
      "loss": 3.2418,
      "step": 125800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.6000640409862313e-05,
      "loss": 3.2377,
      "step": 125900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.599743836055075e-05,
      "loss": 3.2294,
      "step": 126000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8540749549865723,
      "eval_runtime": 112.0664,
      "eval_samples_per_second": 89.233,
      "eval_steps_per_second": 5.577,
      "step": 126000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.59942363112392e-05,
      "loss": 3.2049,
      "step": 126100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.599103426192763e-05,
      "loss": 3.2181,
      "step": 126200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.598783221261608e-05,
      "loss": 3.2281,
      "step": 126300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.598463016330451e-05,
      "loss": 3.2076,
      "step": 126400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.598142811399296e-05,
      "loss": 3.1958,
      "step": 126500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.59782260646814e-05,
      "loss": 3.1946,
      "step": 126600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.597502401536984e-05,
      "loss": 3.2253,
      "step": 126700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5971821966058285e-05,
      "loss": 3.1804,
      "step": 126800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.596861991674672e-05,
      "loss": 3.2132,
      "step": 126900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5965417867435164e-05,
      "loss": 3.2136,
      "step": 127000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8525290489196777,
      "eval_runtime": 110.8205,
      "eval_samples_per_second": 90.236,
      "eval_steps_per_second": 5.64,
      "step": 127000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.59622158181236e-05,
      "loss": 3.2276,
      "step": 127100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5959013768812044e-05,
      "loss": 3.2192,
      "step": 127200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.595581171950048e-05,
      "loss": 3.2228,
      "step": 127300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.595260967018892e-05,
      "loss": 3.2226,
      "step": 127400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.594940762087736e-05,
      "loss": 3.2305,
      "step": 127500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.59462055715658e-05,
      "loss": 3.2162,
      "step": 127600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.594300352225425e-05,
      "loss": 3.2018,
      "step": 127700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.593980147294268e-05,
      "loss": 3.2308,
      "step": 127800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.593659942363113e-05,
      "loss": 3.2095,
      "step": 127900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.593339737431956e-05,
      "loss": 3.2392,
      "step": 128000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8503148555755615,
      "eval_runtime": 116.9375,
      "eval_samples_per_second": 85.516,
      "eval_steps_per_second": 5.345,
      "step": 128000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.593019532500801e-05,
      "loss": 3.1838,
      "step": 128100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.592699327569645e-05,
      "loss": 3.2159,
      "step": 128200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.592379122638489e-05,
      "loss": 3.2015,
      "step": 128300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5920589177073334e-05,
      "loss": 3.1867,
      "step": 128400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.591738712776177e-05,
      "loss": 3.2163,
      "step": 128500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5914185078450213e-05,
      "loss": 3.2064,
      "step": 128600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5910983029138646e-05,
      "loss": 3.2021,
      "step": 128700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.590778097982709e-05,
      "loss": 3.2112,
      "step": 128800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.590457893051553e-05,
      "loss": 3.2066,
      "step": 128900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.590137688120397e-05,
      "loss": 3.2074,
      "step": 129000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8537380695343018,
      "eval_runtime": 110.8721,
      "eval_samples_per_second": 90.194,
      "eval_steps_per_second": 5.637,
      "step": 129000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.589817483189241e-05,
      "loss": 3.2275,
      "step": 129100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.589497278258085e-05,
      "loss": 3.198,
      "step": 129200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.58917707332693e-05,
      "loss": 3.1916,
      "step": 129300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.588856868395773e-05,
      "loss": 3.2277,
      "step": 129400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.588536663464618e-05,
      "loss": 3.2397,
      "step": 129500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.588216458533461e-05,
      "loss": 3.2097,
      "step": 129600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.587896253602306e-05,
      "loss": 3.1943,
      "step": 129700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.58757604867115e-05,
      "loss": 3.2218,
      "step": 129800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.587255843739994e-05,
      "loss": 3.215,
      "step": 129900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.586935638808838e-05,
      "loss": 3.2202,
      "step": 130000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8496246337890625,
      "eval_runtime": 111.0201,
      "eval_samples_per_second": 90.074,
      "eval_steps_per_second": 5.63,
      "step": 130000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5866154338776816e-05,
      "loss": 3.2165,
      "step": 130100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.586295228946526e-05,
      "loss": 3.2144,
      "step": 130200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5859750240153696e-05,
      "loss": 3.2133,
      "step": 130300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.585654819084214e-05,
      "loss": 3.2122,
      "step": 130400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.585334614153058e-05,
      "loss": 3.1994,
      "step": 130500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.585014409221902e-05,
      "loss": 3.1922,
      "step": 130600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.584694204290746e-05,
      "loss": 3.2214,
      "step": 130700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.58437399935959e-05,
      "loss": 3.1833,
      "step": 130800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.584053794428435e-05,
      "loss": 3.2119,
      "step": 130900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.583733589497278e-05,
      "loss": 3.2116,
      "step": 131000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.848942279815674,
      "eval_runtime": 115.9169,
      "eval_samples_per_second": 86.269,
      "eval_steps_per_second": 5.392,
      "step": 131000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.583413384566123e-05,
      "loss": 3.2357,
      "step": 131100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.583093179634967e-05,
      "loss": 3.1923,
      "step": 131200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.582772974703811e-05,
      "loss": 3.2307,
      "step": 131300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5824527697726547e-05,
      "loss": 3.2026,
      "step": 131400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5821325648414986e-05,
      "loss": 3.1958,
      "step": 131500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.581812359910343e-05,
      "loss": 3.1993,
      "step": 131600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5814921549791866e-05,
      "loss": 3.2077,
      "step": 131700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.581171950048031e-05,
      "loss": 3.1977,
      "step": 131800
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5808517451168745e-05,
      "loss": 3.2157,
      "step": 131900
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.580531540185719e-05,
      "loss": 3.1589,
      "step": 132000
    },
    {
      "epoch": 0.08,
      "eval_loss": 2.8476622104644775,
      "eval_runtime": 116.1384,
      "eval_samples_per_second": 86.104,
      "eval_steps_per_second": 5.382,
      "step": 132000
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.580211335254563e-05,
      "loss": 3.209,
      "step": 132100
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.579891130323407e-05,
      "loss": 3.1712,
      "step": 132200
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.579570925392251e-05,
      "loss": 3.1978,
      "step": 132300
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.579250720461095e-05,
      "loss": 3.1785,
      "step": 132400
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.57893051552994e-05,
      "loss": 3.2065,
      "step": 132500
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.578610310598783e-05,
      "loss": 3.2269,
      "step": 132600
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.578290105667628e-05,
      "loss": 3.2274,
      "step": 132700
    },
    {
      "epoch": 0.08,
      "learning_rate": 4.5779699007364716e-05,
      "loss": 3.2239,
      "step": 132800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5776496958053156e-05,
      "loss": 3.2204,
      "step": 132900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5773294908741596e-05,
      "loss": 3.1865,
      "step": 133000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8516881465911865,
      "eval_runtime": 110.4564,
      "eval_samples_per_second": 90.533,
      "eval_steps_per_second": 5.658,
      "step": 133000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5770092859430036e-05,
      "loss": 3.1984,
      "step": 133100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.576689081011848e-05,
      "loss": 3.2207,
      "step": 133200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5763688760806915e-05,
      "loss": 3.1931,
      "step": 133300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.576048671149536e-05,
      "loss": 3.2162,
      "step": 133400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.57572846621838e-05,
      "loss": 3.2096,
      "step": 133500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.575408261287224e-05,
      "loss": 3.2031,
      "step": 133600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.575088056356068e-05,
      "loss": 3.1862,
      "step": 133700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.574767851424912e-05,
      "loss": 3.2195,
      "step": 133800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.574447646493756e-05,
      "loss": 3.1982,
      "step": 133900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5741274415626e-05,
      "loss": 3.1851,
      "step": 134000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8491570949554443,
      "eval_runtime": 111.715,
      "eval_samples_per_second": 89.513,
      "eval_steps_per_second": 5.595,
      "step": 134000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5738072366314447e-05,
      "loss": 3.1766,
      "step": 134100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.573487031700288e-05,
      "loss": 3.197,
      "step": 134200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5731668267691326e-05,
      "loss": 3.2294,
      "step": 134300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5728466218379766e-05,
      "loss": 3.2137,
      "step": 134400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5725264169068206e-05,
      "loss": 3.2062,
      "step": 134500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5722062119756645e-05,
      "loss": 3.2009,
      "step": 134600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5718860070445085e-05,
      "loss": 3.1677,
      "step": 134700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.571565802113353e-05,
      "loss": 3.1868,
      "step": 134800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5712455971821964e-05,
      "loss": 3.1775,
      "step": 134900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.570925392251041e-05,
      "loss": 3.2088,
      "step": 135000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8468475341796875,
      "eval_runtime": 121.8855,
      "eval_samples_per_second": 82.044,
      "eval_steps_per_second": 5.128,
      "step": 135000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.570605187319885e-05,
      "loss": 3.2086,
      "step": 135100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.570284982388729e-05,
      "loss": 3.2003,
      "step": 135200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.569964777457573e-05,
      "loss": 3.19,
      "step": 135300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.569644572526417e-05,
      "loss": 3.1884,
      "step": 135400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.569324367595261e-05,
      "loss": 3.2118,
      "step": 135500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.569004162664105e-05,
      "loss": 3.1801,
      "step": 135600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5686839577329496e-05,
      "loss": 3.1664,
      "step": 135700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5683637528017936e-05,
      "loss": 3.1908,
      "step": 135800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5680435478706375e-05,
      "loss": 3.1511,
      "step": 135900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5677233429394815e-05,
      "loss": 3.1812,
      "step": 136000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.849447250366211,
      "eval_runtime": 121.3154,
      "eval_samples_per_second": 82.43,
      "eval_steps_per_second": 5.152,
      "step": 136000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5674031380083255e-05,
      "loss": 3.1843,
      "step": 136100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5670829330771695e-05,
      "loss": 3.2222,
      "step": 136200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5667627281460134e-05,
      "loss": 3.1815,
      "step": 136300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.566442523214858e-05,
      "loss": 3.2089,
      "step": 136400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5661223182837014e-05,
      "loss": 3.1956,
      "step": 136500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.565802113352546e-05,
      "loss": 3.1967,
      "step": 136600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.56548190842139e-05,
      "loss": 3.2151,
      "step": 136700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.565161703490234e-05,
      "loss": 3.1708,
      "step": 136800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.564841498559078e-05,
      "loss": 3.1845,
      "step": 136900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.564521293627922e-05,
      "loss": 3.2011,
      "step": 137000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.844717264175415,
      "eval_runtime": 110.9992,
      "eval_samples_per_second": 90.091,
      "eval_steps_per_second": 5.631,
      "step": 137000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.564201088696766e-05,
      "loss": 3.1881,
      "step": 137100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.56388088376561e-05,
      "loss": 3.2113,
      "step": 137200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5635606788344545e-05,
      "loss": 3.2171,
      "step": 137300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5632404739032985e-05,
      "loss": 3.2125,
      "step": 137400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5629202689721425e-05,
      "loss": 3.1911,
      "step": 137500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5626000640409865e-05,
      "loss": 3.2089,
      "step": 137600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5622798591098304e-05,
      "loss": 3.1809,
      "step": 137700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5619596541786744e-05,
      "loss": 3.1827,
      "step": 137800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5616394492475184e-05,
      "loss": 3.1905,
      "step": 137900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.561319244316363e-05,
      "loss": 3.1875,
      "step": 138000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8435604572296143,
      "eval_runtime": 111.9187,
      "eval_samples_per_second": 89.351,
      "eval_steps_per_second": 5.584,
      "step": 138000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.560999039385207e-05,
      "loss": 3.2111,
      "step": 138100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.560678834454051e-05,
      "loss": 3.2123,
      "step": 138200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.560358629522895e-05,
      "loss": 3.1964,
      "step": 138300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.560038424591739e-05,
      "loss": 3.1984,
      "step": 138400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.559718219660583e-05,
      "loss": 3.2023,
      "step": 138500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.559398014729427e-05,
      "loss": 3.2121,
      "step": 138600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.559077809798271e-05,
      "loss": 3.2325,
      "step": 138700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.558757604867115e-05,
      "loss": 3.1887,
      "step": 138800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5584373999359595e-05,
      "loss": 3.2066,
      "step": 138900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5581171950048034e-05,
      "loss": 3.1931,
      "step": 139000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8434531688690186,
      "eval_runtime": 111.3944,
      "eval_samples_per_second": 89.771,
      "eval_steps_per_second": 5.611,
      "step": 139000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5577969900736474e-05,
      "loss": 3.2191,
      "step": 139100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5574767851424914e-05,
      "loss": 3.2013,
      "step": 139200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5571565802113354e-05,
      "loss": 3.199,
      "step": 139300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.556836375280179e-05,
      "loss": 3.1988,
      "step": 139400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.556516170349023e-05,
      "loss": 3.2022,
      "step": 139500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.556195965417868e-05,
      "loss": 3.1808,
      "step": 139600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.555875760486712e-05,
      "loss": 3.1741,
      "step": 139700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.555555555555556e-05,
      "loss": 3.2134,
      "step": 139800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5552353506244e-05,
      "loss": 3.1689,
      "step": 139900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.554915145693244e-05,
      "loss": 3.2111,
      "step": 140000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8420772552490234,
      "eval_runtime": 116.7148,
      "eval_samples_per_second": 85.679,
      "eval_steps_per_second": 5.355,
      "step": 140000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.554594940762088e-05,
      "loss": 3.2177,
      "step": 140100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.554274735830932e-05,
      "loss": 3.181,
      "step": 140200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.553954530899776e-05,
      "loss": 3.1943,
      "step": 140300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5536343259686204e-05,
      "loss": 3.2017,
      "step": 140400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5533141210374644e-05,
      "loss": 3.2047,
      "step": 140500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5529939161063084e-05,
      "loss": 3.1858,
      "step": 140600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5526737111751524e-05,
      "loss": 3.1981,
      "step": 140700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.552353506243996e-05,
      "loss": 3.2394,
      "step": 140800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.55203330131284e-05,
      "loss": 3.2236,
      "step": 140900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.551713096381684e-05,
      "loss": 3.1672,
      "step": 141000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8450160026550293,
      "eval_runtime": 116.6118,
      "eval_samples_per_second": 85.755,
      "eval_steps_per_second": 5.36,
      "step": 141000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.551392891450529e-05,
      "loss": 3.1936,
      "step": 141100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.551072686519373e-05,
      "loss": 3.1869,
      "step": 141200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.550752481588217e-05,
      "loss": 3.163,
      "step": 141300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.550432276657061e-05,
      "loss": 3.1697,
      "step": 141400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.550112071725905e-05,
      "loss": 3.1855,
      "step": 141500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.549791866794749e-05,
      "loss": 3.1859,
      "step": 141600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.549471661863593e-05,
      "loss": 3.1851,
      "step": 141700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.549151456932437e-05,
      "loss": 3.1696,
      "step": 141800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.548831252001281e-05,
      "loss": 3.2093,
      "step": 141900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5485110470701254e-05,
      "loss": 3.2264,
      "step": 142000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8409953117370605,
      "eval_runtime": 112.4676,
      "eval_samples_per_second": 88.915,
      "eval_steps_per_second": 5.557,
      "step": 142000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5481908421389693e-05,
      "loss": 3.1725,
      "step": 142100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.547870637207813e-05,
      "loss": 3.2066,
      "step": 142200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.547550432276657e-05,
      "loss": 3.213,
      "step": 142300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.547230227345501e-05,
      "loss": 3.1801,
      "step": 142400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.546910022414345e-05,
      "loss": 3.1865,
      "step": 142500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.546589817483189e-05,
      "loss": 3.2172,
      "step": 142600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.546269612552034e-05,
      "loss": 3.2011,
      "step": 142700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.545949407620878e-05,
      "loss": 3.1885,
      "step": 142800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.545629202689722e-05,
      "loss": 3.1924,
      "step": 142900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.545308997758566e-05,
      "loss": 3.1828,
      "step": 143000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8408474922180176,
      "eval_runtime": 112.3346,
      "eval_samples_per_second": 89.02,
      "eval_steps_per_second": 5.564,
      "step": 143000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.54498879282741e-05,
      "loss": 3.211,
      "step": 143100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.544668587896254e-05,
      "loss": 3.1768,
      "step": 143200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.544348382965098e-05,
      "loss": 3.2011,
      "step": 143300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5440281780339424e-05,
      "loss": 3.1907,
      "step": 143400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5437079731027857e-05,
      "loss": 3.1968,
      "step": 143500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.54338776817163e-05,
      "loss": 3.18,
      "step": 143600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.543067563240474e-05,
      "loss": 3.1812,
      "step": 143700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.542747358309318e-05,
      "loss": 3.1926,
      "step": 143800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.542427153378162e-05,
      "loss": 3.1753,
      "step": 143900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.542106948447006e-05,
      "loss": 3.1771,
      "step": 144000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8427858352661133,
      "eval_runtime": 110.7461,
      "eval_samples_per_second": 90.297,
      "eval_steps_per_second": 5.644,
      "step": 144000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.54178674351585e-05,
      "loss": 3.1833,
      "step": 144100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.541466538584694e-05,
      "loss": 3.1563,
      "step": 144200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.541146333653539e-05,
      "loss": 3.1822,
      "step": 144300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.540826128722383e-05,
      "loss": 3.167,
      "step": 144400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.540505923791227e-05,
      "loss": 3.1613,
      "step": 144500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.540185718860071e-05,
      "loss": 3.1796,
      "step": 144600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.539865513928915e-05,
      "loss": 3.1931,
      "step": 144700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.539545308997759e-05,
      "loss": 3.1872,
      "step": 144800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5392251040666026e-05,
      "loss": 3.1922,
      "step": 144900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.538904899135447e-05,
      "loss": 3.2019,
      "step": 145000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8369483947753906,
      "eval_runtime": 110.659,
      "eval_samples_per_second": 90.368,
      "eval_steps_per_second": 5.648,
      "step": 145000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5385846942042906e-05,
      "loss": 3.1815,
      "step": 145100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.538264489273135e-05,
      "loss": 3.1882,
      "step": 145200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.537944284341979e-05,
      "loss": 3.2078,
      "step": 145300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.537624079410823e-05,
      "loss": 3.197,
      "step": 145400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.537303874479667e-05,
      "loss": 3.1899,
      "step": 145500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.536983669548511e-05,
      "loss": 3.1679,
      "step": 145600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.536663464617356e-05,
      "loss": 3.1734,
      "step": 145700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.536343259686199e-05,
      "loss": 3.2048,
      "step": 145800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.536023054755044e-05,
      "loss": 3.1898,
      "step": 145900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.535702849823888e-05,
      "loss": 3.1786,
      "step": 146000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8361101150512695,
      "eval_runtime": 111.2771,
      "eval_samples_per_second": 89.866,
      "eval_steps_per_second": 5.617,
      "step": 146000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.535382644892732e-05,
      "loss": 3.1405,
      "step": 146100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5350624399615757e-05,
      "loss": 3.1919,
      "step": 146200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5347422350304196e-05,
      "loss": 3.1704,
      "step": 146300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5344220300992636e-05,
      "loss": 3.211,
      "step": 146400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5341018251681076e-05,
      "loss": 3.1532,
      "step": 146500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.533781620236952e-05,
      "loss": 3.1899,
      "step": 146600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5334614153057955e-05,
      "loss": 3.1681,
      "step": 146700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.53314121037464e-05,
      "loss": 3.1651,
      "step": 146800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.532821005443484e-05,
      "loss": 3.1875,
      "step": 146900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.532500800512328e-05,
      "loss": 3.1604,
      "step": 147000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.839078664779663,
      "eval_runtime": 118.8779,
      "eval_samples_per_second": 84.12,
      "eval_steps_per_second": 5.257,
      "step": 147000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.532180595581172e-05,
      "loss": 3.2036,
      "step": 147100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.531860390650016e-05,
      "loss": 3.1542,
      "step": 147200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.531540185718861e-05,
      "loss": 3.1766,
      "step": 147300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.531219980787704e-05,
      "loss": 3.2046,
      "step": 147400
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.530899775856549e-05,
      "loss": 3.1669,
      "step": 147500
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5305795709253926e-05,
      "loss": 3.1656,
      "step": 147600
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5302593659942366e-05,
      "loss": 3.2158,
      "step": 147700
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5299391610630806e-05,
      "loss": 3.1876,
      "step": 147800
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5296189561319246e-05,
      "loss": 3.173,
      "step": 147900
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.529298751200769e-05,
      "loss": 3.1821,
      "step": 148000
    },
    {
      "epoch": 0.09,
      "eval_loss": 2.8335037231445312,
      "eval_runtime": 120.6707,
      "eval_samples_per_second": 82.87,
      "eval_steps_per_second": 5.179,
      "step": 148000
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5289785462696125e-05,
      "loss": 3.186,
      "step": 148100
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.528658341338457e-05,
      "loss": 3.2064,
      "step": 148200
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.5283381364073005e-05,
      "loss": 3.1599,
      "step": 148300
    },
    {
      "epoch": 0.09,
      "learning_rate": 4.528017931476145e-05,
      "loss": 3.1551,
      "step": 148400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.527697726544989e-05,
      "loss": 3.1807,
      "step": 148500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.527377521613833e-05,
      "loss": 3.1663,
      "step": 148600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.527057316682678e-05,
      "loss": 3.1781,
      "step": 148700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.526737111751521e-05,
      "loss": 3.1427,
      "step": 148800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.526416906820366e-05,
      "loss": 3.1756,
      "step": 148900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.526096701889209e-05,
      "loss": 3.1651,
      "step": 149000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8390142917633057,
      "eval_runtime": 110.6371,
      "eval_samples_per_second": 90.386,
      "eval_steps_per_second": 5.649,
      "step": 149000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5257764969580536e-05,
      "loss": 3.161,
      "step": 149100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5254562920268976e-05,
      "loss": 3.2211,
      "step": 149200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5251360870957416e-05,
      "loss": 3.1879,
      "step": 149300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5248158821645855e-05,
      "loss": 3.2077,
      "step": 149400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5244956772334295e-05,
      "loss": 3.1453,
      "step": 149500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.524175472302274e-05,
      "loss": 3.1892,
      "step": 149600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5238552673711175e-05,
      "loss": 3.1637,
      "step": 149700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.523535062439962e-05,
      "loss": 3.2033,
      "step": 149800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5232148575088054e-05,
      "loss": 3.1891,
      "step": 149900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.52289465257765e-05,
      "loss": 3.1716,
      "step": 150000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8338587284088135,
      "eval_runtime": 111.0628,
      "eval_samples_per_second": 90.039,
      "eval_steps_per_second": 5.627,
      "step": 150000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.522574447646494e-05,
      "loss": 3.1753,
      "step": 150100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.522254242715338e-05,
      "loss": 3.1596,
      "step": 150200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5219340377841827e-05,
      "loss": 3.1685,
      "step": 150300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.521613832853026e-05,
      "loss": 3.1863,
      "step": 150400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5212936279218706e-05,
      "loss": 3.1545,
      "step": 150500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.520973422990714e-05,
      "loss": 3.1769,
      "step": 150600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5206532180595585e-05,
      "loss": 3.1415,
      "step": 150700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5203330131284025e-05,
      "loss": 3.186,
      "step": 150800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5200128081972465e-05,
      "loss": 3.1851,
      "step": 150900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5196926032660905e-05,
      "loss": 3.1607,
      "step": 151000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.832872152328491,
      "eval_runtime": 111.9229,
      "eval_samples_per_second": 89.347,
      "eval_steps_per_second": 5.584,
      "step": 151000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5193723983349344e-05,
      "loss": 3.1304,
      "step": 151100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.519052193403779e-05,
      "loss": 3.1556,
      "step": 151200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5187319884726224e-05,
      "loss": 3.1289,
      "step": 151300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.518411783541467e-05,
      "loss": 3.1548,
      "step": 151400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.51809157861031e-05,
      "loss": 3.1523,
      "step": 151500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.517771373679155e-05,
      "loss": 3.1798,
      "step": 151600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.517451168747999e-05,
      "loss": 3.1696,
      "step": 151700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.517130963816843e-05,
      "loss": 3.1764,
      "step": 151800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5168107588856876e-05,
      "loss": 3.1782,
      "step": 151900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.516490553954531e-05,
      "loss": 3.1941,
      "step": 152000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8348934650421143,
      "eval_runtime": 110.3326,
      "eval_samples_per_second": 90.635,
      "eval_steps_per_second": 5.665,
      "step": 152000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5161703490233755e-05,
      "loss": 3.1878,
      "step": 152100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.515850144092219e-05,
      "loss": 3.1704,
      "step": 152200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5155299391610635e-05,
      "loss": 3.1828,
      "step": 152300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.515209734229907e-05,
      "loss": 3.177,
      "step": 152400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5148895292987514e-05,
      "loss": 3.1918,
      "step": 152500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5145693243675954e-05,
      "loss": 3.1869,
      "step": 152600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5142491194364394e-05,
      "loss": 3.1207,
      "step": 152700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.513928914505284e-05,
      "loss": 3.1945,
      "step": 152800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.513608709574127e-05,
      "loss": 3.158,
      "step": 152900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.513288504642972e-05,
      "loss": 3.1892,
      "step": 153000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8355162143707275,
      "eval_runtime": 110.1623,
      "eval_samples_per_second": 90.775,
      "eval_steps_per_second": 5.673,
      "step": 153000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.512968299711815e-05,
      "loss": 3.1893,
      "step": 153100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.51264809478066e-05,
      "loss": 3.1842,
      "step": 153200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.512327889849504e-05,
      "loss": 3.1876,
      "step": 153300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.512007684918348e-05,
      "loss": 3.1759,
      "step": 153400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5116874799871925e-05,
      "loss": 3.1632,
      "step": 153500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.511367275056036e-05,
      "loss": 3.1696,
      "step": 153600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5110470701248805e-05,
      "loss": 3.1621,
      "step": 153700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.510726865193724e-05,
      "loss": 3.1665,
      "step": 153800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5104066602625684e-05,
      "loss": 3.1582,
      "step": 153900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.510086455331412e-05,
      "loss": 3.132,
      "step": 154000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8362460136413574,
      "eval_runtime": 109.946,
      "eval_samples_per_second": 90.954,
      "eval_steps_per_second": 5.685,
      "step": 154000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5097662504002564e-05,
      "loss": 3.1874,
      "step": 154100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5094460454691003e-05,
      "loss": 3.1578,
      "step": 154200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.509125840537944e-05,
      "loss": 3.1786,
      "step": 154300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.508805635606789e-05,
      "loss": 3.1683,
      "step": 154400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.508485430675632e-05,
      "loss": 3.1707,
      "step": 154500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.508165225744477e-05,
      "loss": 3.1709,
      "step": 154600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.50784502081332e-05,
      "loss": 3.1739,
      "step": 154700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.507524815882165e-05,
      "loss": 3.1743,
      "step": 154800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.507204610951009e-05,
      "loss": 3.1866,
      "step": 154900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.506884406019853e-05,
      "loss": 3.1826,
      "step": 155000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8315913677215576,
      "eval_runtime": 112.1421,
      "eval_samples_per_second": 89.173,
      "eval_steps_per_second": 5.573,
      "step": 155000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5065642010886975e-05,
      "loss": 3.1744,
      "step": 155100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.506243996157541e-05,
      "loss": 3.1902,
      "step": 155200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5059237912263854e-05,
      "loss": 3.1797,
      "step": 155300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.505603586295229e-05,
      "loss": 3.1759,
      "step": 155400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5052833813640734e-05,
      "loss": 3.167,
      "step": 155500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.504963176432917e-05,
      "loss": 3.1718,
      "step": 155600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.504642971501761e-05,
      "loss": 3.1443,
      "step": 155700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.504322766570605e-05,
      "loss": 3.1681,
      "step": 155800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.504002561639449e-05,
      "loss": 3.1859,
      "step": 155900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.503682356708294e-05,
      "loss": 3.197,
      "step": 156000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.829352378845215,
      "eval_runtime": 112.9016,
      "eval_samples_per_second": 88.573,
      "eval_steps_per_second": 5.536,
      "step": 156000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.503362151777137e-05,
      "loss": 3.1705,
      "step": 156100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.503041946845982e-05,
      "loss": 3.1893,
      "step": 156200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.502721741914825e-05,
      "loss": 3.1829,
      "step": 156300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.50240153698367e-05,
      "loss": 3.1832,
      "step": 156400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.502081332052514e-05,
      "loss": 3.1976,
      "step": 156500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.501761127121358e-05,
      "loss": 3.146,
      "step": 156600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5014409221902024e-05,
      "loss": 3.1723,
      "step": 156700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.501120717259046e-05,
      "loss": 3.1482,
      "step": 156800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5008005123278903e-05,
      "loss": 3.1613,
      "step": 156900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.5004803073967336e-05,
      "loss": 3.1766,
      "step": 157000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.829166889190674,
      "eval_runtime": 118.2975,
      "eval_samples_per_second": 84.533,
      "eval_steps_per_second": 5.283,
      "step": 157000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.500160102465578e-05,
      "loss": 3.1866,
      "step": 157100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.499839897534422e-05,
      "loss": 3.1691,
      "step": 157200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.499519692603266e-05,
      "loss": 3.2011,
      "step": 157300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.49919948767211e-05,
      "loss": 3.1907,
      "step": 157400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.498879282740954e-05,
      "loss": 3.1926,
      "step": 157500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.498559077809799e-05,
      "loss": 3.1755,
      "step": 157600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.498238872878642e-05,
      "loss": 3.1921,
      "step": 157700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.497918667947487e-05,
      "loss": 3.1651,
      "step": 157800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.497598463016331e-05,
      "loss": 3.1773,
      "step": 157900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.497278258085175e-05,
      "loss": 3.1452,
      "step": 158000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.830792188644409,
      "eval_runtime": 118.0556,
      "eval_samples_per_second": 84.706,
      "eval_steps_per_second": 5.294,
      "step": 158000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.496958053154019e-05,
      "loss": 3.1571,
      "step": 158100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.496637848222863e-05,
      "loss": 3.1738,
      "step": 158200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.496317643291707e-05,
      "loss": 3.153,
      "step": 158300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4959974383605506e-05,
      "loss": 3.1574,
      "step": 158400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.495677233429395e-05,
      "loss": 3.1626,
      "step": 158500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.495357028498239e-05,
      "loss": 3.1436,
      "step": 158600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.495036823567083e-05,
      "loss": 3.1782,
      "step": 158700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.494716618635927e-05,
      "loss": 3.1539,
      "step": 158800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.494396413704771e-05,
      "loss": 3.1984,
      "step": 158900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.494076208773615e-05,
      "loss": 3.1715,
      "step": 159000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8291139602661133,
      "eval_runtime": 118.4841,
      "eval_samples_per_second": 84.4,
      "eval_steps_per_second": 5.275,
      "step": 159000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.493756003842459e-05,
      "loss": 3.1636,
      "step": 159100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.493435798911304e-05,
      "loss": 3.1816,
      "step": 159200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.493115593980147e-05,
      "loss": 3.1827,
      "step": 159300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.492795389048992e-05,
      "loss": 3.1679,
      "step": 159400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.492475184117836e-05,
      "loss": 3.1483,
      "step": 159500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.49215497918668e-05,
      "loss": 3.1504,
      "step": 159600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4918347742555236e-05,
      "loss": 3.1587,
      "step": 159700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4915145693243676e-05,
      "loss": 3.1648,
      "step": 159800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.491194364393212e-05,
      "loss": 3.175,
      "step": 159900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4908741594620556e-05,
      "loss": 3.1653,
      "step": 160000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8284478187561035,
      "eval_runtime": 117.7347,
      "eval_samples_per_second": 84.937,
      "eval_steps_per_second": 5.309,
      "step": 160000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4905539545309e-05,
      "loss": 3.1554,
      "step": 160100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.490233749599744e-05,
      "loss": 3.1412,
      "step": 160200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.489913544668588e-05,
      "loss": 3.1952,
      "step": 160300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.489593339737432e-05,
      "loss": 3.1505,
      "step": 160400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.489273134806276e-05,
      "loss": 3.1652,
      "step": 160500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.48895292987512e-05,
      "loss": 3.163,
      "step": 160600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.488632724943964e-05,
      "loss": 3.1765,
      "step": 160700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.488312520012809e-05,
      "loss": 3.1868,
      "step": 160800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.487992315081653e-05,
      "loss": 3.1736,
      "step": 160900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.487672110150497e-05,
      "loss": 3.1613,
      "step": 161000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.828770399093628,
      "eval_runtime": 116.9493,
      "eval_samples_per_second": 85.507,
      "eval_steps_per_second": 5.344,
      "step": 161000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4873519052193406e-05,
      "loss": 3.1652,
      "step": 161100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4870317002881846e-05,
      "loss": 3.1225,
      "step": 161200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4867114953570286e-05,
      "loss": 3.167,
      "step": 161300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4863912904258726e-05,
      "loss": 3.1773,
      "step": 161400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.486071085494717e-05,
      "loss": 3.1443,
      "step": 161500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4857508805635605e-05,
      "loss": 3.1729,
      "step": 161600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.485430675632405e-05,
      "loss": 3.1501,
      "step": 161700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.485110470701249e-05,
      "loss": 3.1932,
      "step": 161800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.484790265770093e-05,
      "loss": 3.1383,
      "step": 161900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.484470060838937e-05,
      "loss": 3.1624,
      "step": 162000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8323256969451904,
      "eval_runtime": 116.7555,
      "eval_samples_per_second": 85.649,
      "eval_steps_per_second": 5.353,
      "step": 162000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.484149855907781e-05,
      "loss": 3.165,
      "step": 162100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.483829650976625e-05,
      "loss": 3.1659,
      "step": 162200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.483509446045469e-05,
      "loss": 3.165,
      "step": 162300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4831892411143137e-05,
      "loss": 3.1482,
      "step": 162400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4828690361831576e-05,
      "loss": 3.1604,
      "step": 162500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4825488312520016e-05,
      "loss": 3.1898,
      "step": 162600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4822286263208456e-05,
      "loss": 3.1699,
      "step": 162700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4819084213896896e-05,
      "loss": 3.1908,
      "step": 162800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4815882164585335e-05,
      "loss": 3.1641,
      "step": 162900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4812680115273775e-05,
      "loss": 3.1426,
      "step": 163000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.828151226043701,
      "eval_runtime": 117.3101,
      "eval_samples_per_second": 85.244,
      "eval_steps_per_second": 5.328,
      "step": 163000
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.480947806596222e-05,
      "loss": 3.174,
      "step": 163100
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.480627601665066e-05,
      "loss": 3.1507,
      "step": 163200
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.48030739673391e-05,
      "loss": 3.1552,
      "step": 163300
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.479987191802754e-05,
      "loss": 3.1565,
      "step": 163400
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.479666986871598e-05,
      "loss": 3.1489,
      "step": 163500
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.479346781940442e-05,
      "loss": 3.1412,
      "step": 163600
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.479026577009286e-05,
      "loss": 3.1871,
      "step": 163700
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.47870637207813e-05,
      "loss": 3.187,
      "step": 163800
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.478386167146974e-05,
      "loss": 3.1685,
      "step": 163900
    },
    {
      "epoch": 0.1,
      "learning_rate": 4.4780659622158186e-05,
      "loss": 3.1447,
      "step": 164000
    },
    {
      "epoch": 0.1,
      "eval_loss": 2.8244762420654297,
      "eval_runtime": 117.631,
      "eval_samples_per_second": 85.012,
      "eval_steps_per_second": 5.313,
      "step": 164000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4777457572846626e-05,
      "loss": 3.1474,
      "step": 164100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4774255523535065e-05,
      "loss": 3.1545,
      "step": 164200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4771053474223505e-05,
      "loss": 3.182,
      "step": 164300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4767851424911945e-05,
      "loss": 3.1458,
      "step": 164400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4764649375600385e-05,
      "loss": 3.1791,
      "step": 164500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4761447326288824e-05,
      "loss": 3.1498,
      "step": 164600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.475824527697727e-05,
      "loss": 3.1451,
      "step": 164700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.475504322766571e-05,
      "loss": 3.1569,
      "step": 164800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.475184117835415e-05,
      "loss": 3.1221,
      "step": 164900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.474863912904259e-05,
      "loss": 3.1683,
      "step": 165000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8249683380126953,
      "eval_runtime": 117.7904,
      "eval_samples_per_second": 84.897,
      "eval_steps_per_second": 5.306,
      "step": 165000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.474543707973103e-05,
      "loss": 3.141,
      "step": 165100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.474223503041947e-05,
      "loss": 3.1262,
      "step": 165200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.473903298110791e-05,
      "loss": 3.1458,
      "step": 165300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.473583093179635e-05,
      "loss": 3.1488,
      "step": 165400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4732628882484796e-05,
      "loss": 3.1583,
      "step": 165500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4729426833173235e-05,
      "loss": 3.1657,
      "step": 165600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4726224783861675e-05,
      "loss": 3.1826,
      "step": 165700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4723022734550115e-05,
      "loss": 3.1582,
      "step": 165800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4719820685238555e-05,
      "loss": 3.1579,
      "step": 165900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4716618635926994e-05,
      "loss": 3.1456,
      "step": 166000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8269667625427246,
      "eval_runtime": 120.1228,
      "eval_samples_per_second": 83.248,
      "eval_steps_per_second": 5.203,
      "step": 166000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4713416586615434e-05,
      "loss": 3.1461,
      "step": 166100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4710214537303874e-05,
      "loss": 3.1707,
      "step": 166200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.470701248799232e-05,
      "loss": 3.1207,
      "step": 166300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.470381043868076e-05,
      "loss": 3.1666,
      "step": 166400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.47006083893692e-05,
      "loss": 3.1568,
      "step": 166500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.469740634005764e-05,
      "loss": 3.1637,
      "step": 166600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.469420429074608e-05,
      "loss": 3.162,
      "step": 166700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.469100224143452e-05,
      "loss": 3.143,
      "step": 166800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.468780019212296e-05,
      "loss": 3.1455,
      "step": 166900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.46845981428114e-05,
      "loss": 3.1684,
      "step": 167000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.820765972137451,
      "eval_runtime": 121.3102,
      "eval_samples_per_second": 82.433,
      "eval_steps_per_second": 5.152,
      "step": 167000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4681396093499845e-05,
      "loss": 3.1309,
      "step": 167100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4678194044188285e-05,
      "loss": 3.1259,
      "step": 167200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4674991994876724e-05,
      "loss": 3.1343,
      "step": 167300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4671789945565164e-05,
      "loss": 3.1451,
      "step": 167400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4668587896253604e-05,
      "loss": 3.1726,
      "step": 167500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4665385846942044e-05,
      "loss": 3.1601,
      "step": 167600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.466218379763048e-05,
      "loss": 3.1498,
      "step": 167700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.465898174831893e-05,
      "loss": 3.1232,
      "step": 167800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.465577969900737e-05,
      "loss": 3.138,
      "step": 167900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.465257764969581e-05,
      "loss": 3.1567,
      "step": 168000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8227803707122803,
      "eval_runtime": 118.9833,
      "eval_samples_per_second": 84.045,
      "eval_steps_per_second": 5.253,
      "step": 168000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.464937560038425e-05,
      "loss": 3.1274,
      "step": 168100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.464617355107269e-05,
      "loss": 3.1587,
      "step": 168200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.464297150176113e-05,
      "loss": 3.1577,
      "step": 168300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.463976945244957e-05,
      "loss": 3.1432,
      "step": 168400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4636567403138015e-05,
      "loss": 3.1449,
      "step": 168500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.463336535382645e-05,
      "loss": 3.1487,
      "step": 168600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4630163304514894e-05,
      "loss": 3.133,
      "step": 168700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4626961255203334e-05,
      "loss": 3.1752,
      "step": 168800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4623759205891774e-05,
      "loss": 3.1593,
      "step": 168900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4620557156580214e-05,
      "loss": 3.1398,
      "step": 169000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8240785598754883,
      "eval_runtime": 118.6795,
      "eval_samples_per_second": 84.261,
      "eval_steps_per_second": 5.266,
      "step": 169000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.461735510726865e-05,
      "loss": 3.1536,
      "step": 169100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.461415305795709e-05,
      "loss": 3.1683,
      "step": 169200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.461095100864553e-05,
      "loss": 3.1113,
      "step": 169300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.460774895933398e-05,
      "loss": 3.1132,
      "step": 169400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.460454691002241e-05,
      "loss": 3.1649,
      "step": 169500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.460134486071086e-05,
      "loss": 3.1493,
      "step": 169600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.45981428113993e-05,
      "loss": 3.1625,
      "step": 169700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.459494076208774e-05,
      "loss": 3.1557,
      "step": 169800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.459173871277618e-05,
      "loss": 3.1544,
      "step": 169900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.458853666346462e-05,
      "loss": 3.1201,
      "step": 170000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.820582389831543,
      "eval_runtime": 117.8087,
      "eval_samples_per_second": 84.883,
      "eval_steps_per_second": 5.305,
      "step": 170000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4585334614153064e-05,
      "loss": 3.1476,
      "step": 170100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.45821325648415e-05,
      "loss": 3.1605,
      "step": 170200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4578930515529944e-05,
      "loss": 3.1496,
      "step": 170300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4575728466218383e-05,
      "loss": 3.1326,
      "step": 170400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.457252641690682e-05,
      "loss": 3.1139,
      "step": 170500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.456932436759526e-05,
      "loss": 3.1433,
      "step": 170600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.45661223182837e-05,
      "loss": 3.1422,
      "step": 170700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.456292026897215e-05,
      "loss": 3.1174,
      "step": 170800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.455971821966058e-05,
      "loss": 3.1659,
      "step": 170900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.455651617034903e-05,
      "loss": 3.1287,
      "step": 171000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.820260524749756,
      "eval_runtime": 118.179,
      "eval_samples_per_second": 84.617,
      "eval_steps_per_second": 5.289,
      "step": 171000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.455331412103746e-05,
      "loss": 3.1421,
      "step": 171100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.455011207172591e-05,
      "loss": 3.1479,
      "step": 171200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.454691002241435e-05,
      "loss": 3.1463,
      "step": 171300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.454370797310279e-05,
      "loss": 3.1546,
      "step": 171400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.454050592379123e-05,
      "loss": 3.1228,
      "step": 171500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.453730387447967e-05,
      "loss": 3.1464,
      "step": 171600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4534101825168114e-05,
      "loss": 3.1534,
      "step": 171700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4530899775856547e-05,
      "loss": 3.1496,
      "step": 171800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.452769772654499e-05,
      "loss": 3.1316,
      "step": 171900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.452449567723343e-05,
      "loss": 3.1553,
      "step": 172000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.822178840637207,
      "eval_runtime": 118.6205,
      "eval_samples_per_second": 84.302,
      "eval_steps_per_second": 5.269,
      "step": 172000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.452129362792187e-05,
      "loss": 3.1606,
      "step": 172100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.451809157861031e-05,
      "loss": 3.1689,
      "step": 172200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.451488952929875e-05,
      "loss": 3.1403,
      "step": 172300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.45116874799872e-05,
      "loss": 3.1541,
      "step": 172400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.450848543067563e-05,
      "loss": 3.1861,
      "step": 172500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.450528338136408e-05,
      "loss": 3.1652,
      "step": 172600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.450208133205251e-05,
      "loss": 3.137,
      "step": 172700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.449887928274096e-05,
      "loss": 3.1603,
      "step": 172800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.44956772334294e-05,
      "loss": 3.1386,
      "step": 172900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.449247518411784e-05,
      "loss": 3.1532,
      "step": 173000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.816577434539795,
      "eval_runtime": 118.0079,
      "eval_samples_per_second": 84.74,
      "eval_steps_per_second": 5.296,
      "step": 173000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4489273134806283e-05,
      "loss": 3.1347,
      "step": 173100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4486071085494716e-05,
      "loss": 3.1489,
      "step": 173200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.448286903618316e-05,
      "loss": 3.1271,
      "step": 173300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4479666986871596e-05,
      "loss": 3.1301,
      "step": 173400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.447646493756004e-05,
      "loss": 3.148,
      "step": 173500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.447326288824848e-05,
      "loss": 3.11,
      "step": 173600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.447006083893692e-05,
      "loss": 3.1402,
      "step": 173700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.446685878962536e-05,
      "loss": 3.1256,
      "step": 173800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.44636567403138e-05,
      "loss": 3.1626,
      "step": 173900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.446045469100225e-05,
      "loss": 3.1321,
      "step": 174000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8151018619537354,
      "eval_runtime": 119.14,
      "eval_samples_per_second": 83.935,
      "eval_steps_per_second": 5.246,
      "step": 174000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.445725264169068e-05,
      "loss": 3.1617,
      "step": 174100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.445405059237913e-05,
      "loss": 3.1461,
      "step": 174200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.445084854306756e-05,
      "loss": 3.1466,
      "step": 174300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.444764649375601e-05,
      "loss": 3.1807,
      "step": 174400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4444444444444447e-05,
      "loss": 3.1708,
      "step": 174500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4441242395132886e-05,
      "loss": 3.1487,
      "step": 174600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.443804034582133e-05,
      "loss": 3.1198,
      "step": 174700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4434838296509766e-05,
      "loss": 3.1699,
      "step": 174800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.443163624719821e-05,
      "loss": 3.1501,
      "step": 174900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4428434197886645e-05,
      "loss": 3.1188,
      "step": 175000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.815021276473999,
      "eval_runtime": 118.2781,
      "eval_samples_per_second": 84.546,
      "eval_steps_per_second": 5.284,
      "step": 175000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.442523214857509e-05,
      "loss": 3.1148,
      "step": 175100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.442203009926353e-05,
      "loss": 3.1489,
      "step": 175200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.441882804995197e-05,
      "loss": 3.1373,
      "step": 175300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.441562600064042e-05,
      "loss": 3.1115,
      "step": 175400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.441242395132885e-05,
      "loss": 3.1464,
      "step": 175500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.44092219020173e-05,
      "loss": 3.1468,
      "step": 175600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.440601985270573e-05,
      "loss": 3.1315,
      "step": 175700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.440281780339418e-05,
      "loss": 3.1334,
      "step": 175800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.439961575408261e-05,
      "loss": 3.127,
      "step": 175900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4396413704771056e-05,
      "loss": 3.1628,
      "step": 176000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.815659761428833,
      "eval_runtime": 119.3766,
      "eval_samples_per_second": 83.769,
      "eval_steps_per_second": 5.236,
      "step": 176000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4393211655459496e-05,
      "loss": 3.1605,
      "step": 176100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4390009606147936e-05,
      "loss": 3.1606,
      "step": 176200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.438680755683638e-05,
      "loss": 3.1575,
      "step": 176300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4383605507524815e-05,
      "loss": 3.1558,
      "step": 176400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.438040345821326e-05,
      "loss": 3.1474,
      "step": 176500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4377201408901695e-05,
      "loss": 3.1245,
      "step": 176600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.437399935959014e-05,
      "loss": 3.1358,
      "step": 176700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.437079731027858e-05,
      "loss": 3.112,
      "step": 176800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.436759526096702e-05,
      "loss": 3.171,
      "step": 176900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.436439321165547e-05,
      "loss": 3.1162,
      "step": 177000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8179445266723633,
      "eval_runtime": 120.0235,
      "eval_samples_per_second": 83.317,
      "eval_steps_per_second": 5.207,
      "step": 177000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.43611911623439e-05,
      "loss": 3.1308,
      "step": 177100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.435798911303235e-05,
      "loss": 3.137,
      "step": 177200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.435478706372078e-05,
      "loss": 3.1315,
      "step": 177300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4351585014409226e-05,
      "loss": 3.1264,
      "step": 177400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.434838296509766e-05,
      "loss": 3.1566,
      "step": 177500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4345180915786106e-05,
      "loss": 3.1182,
      "step": 177600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4341978866474545e-05,
      "loss": 3.1302,
      "step": 177700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4338776817162985e-05,
      "loss": 3.1555,
      "step": 177800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.433557476785143e-05,
      "loss": 3.1318,
      "step": 177900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4332372718539865e-05,
      "loss": 3.1412,
      "step": 178000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.817652940750122,
      "eval_runtime": 111.8707,
      "eval_samples_per_second": 89.389,
      "eval_steps_per_second": 5.587,
      "step": 178000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.432917066922831e-05,
      "loss": 3.164,
      "step": 178100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4325968619916744e-05,
      "loss": 3.1382,
      "step": 178200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.432276657060519e-05,
      "loss": 3.1056,
      "step": 178300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.431956452129363e-05,
      "loss": 3.1285,
      "step": 178400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.431636247198207e-05,
      "loss": 3.1385,
      "step": 178500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4313160422670517e-05,
      "loss": 3.1385,
      "step": 178600
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.430995837335895e-05,
      "loss": 3.1513,
      "step": 178700
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4306756324047396e-05,
      "loss": 3.1456,
      "step": 178800
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.430355427473583e-05,
      "loss": 3.151,
      "step": 178900
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4300352225424275e-05,
      "loss": 3.1104,
      "step": 179000
    },
    {
      "epoch": 0.11,
      "eval_loss": 2.8151299953460693,
      "eval_runtime": 110.6322,
      "eval_samples_per_second": 90.39,
      "eval_steps_per_second": 5.649,
      "step": 179000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.429715017611271e-05,
      "loss": 3.1186,
      "step": 179100
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4293948126801155e-05,
      "loss": 3.1276,
      "step": 179200
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4290746077489595e-05,
      "loss": 3.1471,
      "step": 179300
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4287544028178034e-05,
      "loss": 3.1254,
      "step": 179400
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.428434197886648e-05,
      "loss": 3.1255,
      "step": 179500
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.4281139929554914e-05,
      "loss": 3.1147,
      "step": 179600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.427793788024336e-05,
      "loss": 3.1568,
      "step": 179700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.427473583093179e-05,
      "loss": 3.1392,
      "step": 179800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.427153378162024e-05,
      "loss": 3.1492,
      "step": 179900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.426833173230868e-05,
      "loss": 3.1502,
      "step": 180000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.815356969833374,
      "eval_runtime": 110.5121,
      "eval_samples_per_second": 90.488,
      "eval_steps_per_second": 5.655,
      "step": 180000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.426512968299712e-05,
      "loss": 3.1196,
      "step": 180100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4261927633685566e-05,
      "loss": 3.1219,
      "step": 180200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4258725584374e-05,
      "loss": 3.1316,
      "step": 180300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4255523535062445e-05,
      "loss": 3.1491,
      "step": 180400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.425232148575088e-05,
      "loss": 3.1346,
      "step": 180500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4249119436439325e-05,
      "loss": 3.1339,
      "step": 180600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4245917387127765e-05,
      "loss": 3.1446,
      "step": 180700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4242715337816204e-05,
      "loss": 3.1163,
      "step": 180800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4239513288504644e-05,
      "loss": 3.146,
      "step": 180900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4236311239193084e-05,
      "loss": 3.1259,
      "step": 181000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8124563694000244,
      "eval_runtime": 110.9148,
      "eval_samples_per_second": 90.159,
      "eval_steps_per_second": 5.635,
      "step": 181000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.423310918988153e-05,
      "loss": 3.147,
      "step": 181100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.422990714056996e-05,
      "loss": 3.1101,
      "step": 181200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.422670509125841e-05,
      "loss": 3.1344,
      "step": 181300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.422350304194684e-05,
      "loss": 3.1093,
      "step": 181400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.422030099263529e-05,
      "loss": 3.1424,
      "step": 181500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.421709894332373e-05,
      "loss": 3.1084,
      "step": 181600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.421389689401217e-05,
      "loss": 3.1607,
      "step": 181700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4210694844700615e-05,
      "loss": 3.1509,
      "step": 181800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.420749279538905e-05,
      "loss": 3.1526,
      "step": 181900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4204290746077495e-05,
      "loss": 3.1491,
      "step": 182000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8145580291748047,
      "eval_runtime": 110.4879,
      "eval_samples_per_second": 90.508,
      "eval_steps_per_second": 5.657,
      "step": 182000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.420108869676593e-05,
      "loss": 3.1492,
      "step": 182100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4197886647454374e-05,
      "loss": 3.1436,
      "step": 182200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4194684598142814e-05,
      "loss": 3.1113,
      "step": 182300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4191482548831254e-05,
      "loss": 3.1083,
      "step": 182400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4188280499519693e-05,
      "loss": 3.1043,
      "step": 182500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.418507845020813e-05,
      "loss": 3.1462,
      "step": 182600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.418187640089658e-05,
      "loss": 3.1223,
      "step": 182700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.417867435158501e-05,
      "loss": 3.1582,
      "step": 182800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.417547230227346e-05,
      "loss": 3.1519,
      "step": 182900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.41722702529619e-05,
      "loss": 3.1153,
      "step": 183000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.811768054962158,
      "eval_runtime": 109.9569,
      "eval_samples_per_second": 90.945,
      "eval_steps_per_second": 5.684,
      "step": 183000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.416906820365034e-05,
      "loss": 3.1397,
      "step": 183100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.416586615433878e-05,
      "loss": 3.108,
      "step": 183200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.416266410502722e-05,
      "loss": 3.1573,
      "step": 183300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4159462055715665e-05,
      "loss": 3.1531,
      "step": 183400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.41562600064041e-05,
      "loss": 3.1096,
      "step": 183500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4153057957092544e-05,
      "loss": 3.1468,
      "step": 183600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.414985590778098e-05,
      "loss": 3.1252,
      "step": 183700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4146653858469424e-05,
      "loss": 3.1335,
      "step": 183800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.414345180915786e-05,
      "loss": 3.1395,
      "step": 183900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.41402497598463e-05,
      "loss": 3.1352,
      "step": 184000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.810725450515747,
      "eval_runtime": 109.9847,
      "eval_samples_per_second": 90.922,
      "eval_steps_per_second": 5.683,
      "step": 184000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.413704771053474e-05,
      "loss": 3.1394,
      "step": 184100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.413384566122318e-05,
      "loss": 3.1691,
      "step": 184200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.413064361191163e-05,
      "loss": 3.1038,
      "step": 184300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.412744156260006e-05,
      "loss": 3.1084,
      "step": 184400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.412423951328851e-05,
      "loss": 3.118,
      "step": 184500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.412103746397695e-05,
      "loss": 3.1365,
      "step": 184600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.411783541466539e-05,
      "loss": 3.1238,
      "step": 184700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.411463336535383e-05,
      "loss": 3.1149,
      "step": 184800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.411143131604227e-05,
      "loss": 3.1559,
      "step": 184900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4108229266730714e-05,
      "loss": 3.1079,
      "step": 185000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.811378240585327,
      "eval_runtime": 109.8403,
      "eval_samples_per_second": 91.041,
      "eval_steps_per_second": 5.69,
      "step": 185000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.410502721741915e-05,
      "loss": 3.1319,
      "step": 185100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4101825168107593e-05,
      "loss": 3.1612,
      "step": 185200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.409862311879603e-05,
      "loss": 3.1298,
      "step": 185300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.409542106948447e-05,
      "loss": 3.1004,
      "step": 185400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.409221902017291e-05,
      "loss": 3.1448,
      "step": 185500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.408901697086135e-05,
      "loss": 3.1391,
      "step": 185600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.408581492154979e-05,
      "loss": 3.1501,
      "step": 185700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.408261287223823e-05,
      "loss": 3.1093,
      "step": 185800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.407941082292668e-05,
      "loss": 3.1491,
      "step": 185900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.407620877361511e-05,
      "loss": 3.1302,
      "step": 186000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8100852966308594,
      "eval_runtime": 110.279,
      "eval_samples_per_second": 90.679,
      "eval_steps_per_second": 5.667,
      "step": 186000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.407300672430356e-05,
      "loss": 3.1237,
      "step": 186100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4069804674992e-05,
      "loss": 3.1589,
      "step": 186200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.406660262568044e-05,
      "loss": 3.1216,
      "step": 186300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.406340057636888e-05,
      "loss": 3.1237,
      "step": 186400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.406019852705732e-05,
      "loss": 3.1476,
      "step": 186500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.405699647774576e-05,
      "loss": 3.1515,
      "step": 186600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4053794428434196e-05,
      "loss": 3.1199,
      "step": 186700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.405059237912264e-05,
      "loss": 3.1413,
      "step": 186800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.404739032981108e-05,
      "loss": 3.1122,
      "step": 186900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.404418828049952e-05,
      "loss": 3.1245,
      "step": 187000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8092496395111084,
      "eval_runtime": 120.0845,
      "eval_samples_per_second": 83.275,
      "eval_steps_per_second": 5.205,
      "step": 187000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.404098623118796e-05,
      "loss": 3.113,
      "step": 187100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.40377841818764e-05,
      "loss": 3.1366,
      "step": 187200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.403458213256484e-05,
      "loss": 3.1155,
      "step": 187300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.403138008325328e-05,
      "loss": 3.1162,
      "step": 187400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.402817803394173e-05,
      "loss": 3.1163,
      "step": 187500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.402497598463017e-05,
      "loss": 3.1631,
      "step": 187600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.402177393531861e-05,
      "loss": 3.1404,
      "step": 187700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.401857188600705e-05,
      "loss": 3.12,
      "step": 187800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.401536983669549e-05,
      "loss": 3.1335,
      "step": 187900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4012167787383926e-05,
      "loss": 3.1196,
      "step": 188000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8056962490081787,
      "eval_runtime": 121.4036,
      "eval_samples_per_second": 82.37,
      "eval_steps_per_second": 5.148,
      "step": 188000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4008965738072366e-05,
      "loss": 3.1147,
      "step": 188100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.4005763688760806e-05,
      "loss": 3.1432,
      "step": 188200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.400256163944925e-05,
      "loss": 3.1395,
      "step": 188300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.399935959013769e-05,
      "loss": 3.1316,
      "step": 188400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.399615754082613e-05,
      "loss": 3.1202,
      "step": 188500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.399295549151457e-05,
      "loss": 3.1443,
      "step": 188600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.398975344220301e-05,
      "loss": 3.145,
      "step": 188700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.398655139289145e-05,
      "loss": 3.1459,
      "step": 188800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.398334934357989e-05,
      "loss": 3.1402,
      "step": 188900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.398014729426833e-05,
      "loss": 3.1268,
      "step": 189000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.803053617477417,
      "eval_runtime": 113.5737,
      "eval_samples_per_second": 88.049,
      "eval_steps_per_second": 5.503,
      "step": 189000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.397694524495678e-05,
      "loss": 3.1213,
      "step": 189100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.397374319564522e-05,
      "loss": 3.1361,
      "step": 189200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.397054114633366e-05,
      "loss": 3.14,
      "step": 189300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3967339097022096e-05,
      "loss": 3.1375,
      "step": 189400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3964137047710536e-05,
      "loss": 3.1209,
      "step": 189500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3960934998398976e-05,
      "loss": 3.1191,
      "step": 189600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3957732949087416e-05,
      "loss": 3.1332,
      "step": 189700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3954530899775855e-05,
      "loss": 3.1214,
      "step": 189800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.39513288504643e-05,
      "loss": 3.1438,
      "step": 189900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.394812680115274e-05,
      "loss": 3.1192,
      "step": 190000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8033854961395264,
      "eval_runtime": 111.1916,
      "eval_samples_per_second": 89.935,
      "eval_steps_per_second": 5.621,
      "step": 190000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.394492475184118e-05,
      "loss": 3.1255,
      "step": 190100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.394172270252962e-05,
      "loss": 3.112,
      "step": 190200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.393852065321806e-05,
      "loss": 3.1408,
      "step": 190300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.39353186039065e-05,
      "loss": 3.1354,
      "step": 190400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.393211655459494e-05,
      "loss": 3.1208,
      "step": 190500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.392891450528339e-05,
      "loss": 3.1157,
      "step": 190600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3925712455971827e-05,
      "loss": 3.1495,
      "step": 190700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3922510406660266e-05,
      "loss": 3.144,
      "step": 190800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3919308357348706e-05,
      "loss": 3.1166,
      "step": 190900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3916106308037146e-05,
      "loss": 3.1428,
      "step": 191000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8022422790527344,
      "eval_runtime": 110.4348,
      "eval_samples_per_second": 90.551,
      "eval_steps_per_second": 5.659,
      "step": 191000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3912904258725585e-05,
      "loss": 3.133,
      "step": 191100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3909702209414025e-05,
      "loss": 3.1139,
      "step": 191200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3906500160102465e-05,
      "loss": 3.0988,
      "step": 191300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3903298110790905e-05,
      "loss": 3.0884,
      "step": 191400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.390009606147935e-05,
      "loss": 3.1004,
      "step": 191500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.389689401216779e-05,
      "loss": 3.1097,
      "step": 191600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.389369196285623e-05,
      "loss": 3.1191,
      "step": 191700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.389048991354467e-05,
      "loss": 3.1324,
      "step": 191800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.388728786423311e-05,
      "loss": 3.0946,
      "step": 191900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.388408581492155e-05,
      "loss": 3.1392,
      "step": 192000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8026797771453857,
      "eval_runtime": 110.043,
      "eval_samples_per_second": 90.874,
      "eval_steps_per_second": 5.68,
      "step": 192000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.388088376560999e-05,
      "loss": 3.1301,
      "step": 192100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3877681716298436e-05,
      "loss": 3.1275,
      "step": 192200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3874479666986876e-05,
      "loss": 3.1328,
      "step": 192300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3871277617675316e-05,
      "loss": 3.1282,
      "step": 192400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3868075568363755e-05,
      "loss": 3.1333,
      "step": 192500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3864873519052195e-05,
      "loss": 3.1582,
      "step": 192600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3861671469740635e-05,
      "loss": 3.1388,
      "step": 192700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3858469420429075e-05,
      "loss": 3.153,
      "step": 192800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.385526737111752e-05,
      "loss": 3.1128,
      "step": 192900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3852065321805954e-05,
      "loss": 3.1306,
      "step": 193000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.802475690841675,
      "eval_runtime": 110.0038,
      "eval_samples_per_second": 90.906,
      "eval_steps_per_second": 5.682,
      "step": 193000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.38488632724944e-05,
      "loss": 3.137,
      "step": 193100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.384566122318284e-05,
      "loss": 3.1183,
      "step": 193200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.384245917387128e-05,
      "loss": 3.1233,
      "step": 193300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.383925712455972e-05,
      "loss": 3.1521,
      "step": 193400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.383605507524816e-05,
      "loss": 3.115,
      "step": 193500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.38328530259366e-05,
      "loss": 3.149,
      "step": 193600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.382965097662504e-05,
      "loss": 3.157,
      "step": 193700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3826448927313486e-05,
      "loss": 3.1101,
      "step": 193800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3823246878001925e-05,
      "loss": 3.1243,
      "step": 193900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3820044828690365e-05,
      "loss": 3.1095,
      "step": 194000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8005101680755615,
      "eval_runtime": 110.3369,
      "eval_samples_per_second": 90.632,
      "eval_steps_per_second": 5.664,
      "step": 194000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3816842779378805e-05,
      "loss": 3.151,
      "step": 194100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3813640730067245e-05,
      "loss": 3.1179,
      "step": 194200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3810438680755684e-05,
      "loss": 3.1285,
      "step": 194300
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3807236631444124e-05,
      "loss": 3.1322,
      "step": 194400
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.380403458213257e-05,
      "loss": 3.134,
      "step": 194500
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3800832532821003e-05,
      "loss": 3.1218,
      "step": 194600
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.379763048350945e-05,
      "loss": 3.1275,
      "step": 194700
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.379442843419789e-05,
      "loss": 3.1527,
      "step": 194800
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.379122638488633e-05,
      "loss": 3.1259,
      "step": 194900
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.378802433557477e-05,
      "loss": 3.1012,
      "step": 195000
    },
    {
      "epoch": 0.12,
      "eval_loss": 2.8023297786712646,
      "eval_runtime": 110.3644,
      "eval_samples_per_second": 90.609,
      "eval_steps_per_second": 5.663,
      "step": 195000
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.378482228626321e-05,
      "loss": 3.1186,
      "step": 195100
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.3781620236951655e-05,
      "loss": 3.1139,
      "step": 195200
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.377841818764009e-05,
      "loss": 3.1459,
      "step": 195300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3775216138328535e-05,
      "loss": 3.1109,
      "step": 195400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3772014089016975e-05,
      "loss": 3.1189,
      "step": 195500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3768812039705414e-05,
      "loss": 3.1264,
      "step": 195600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3765609990393854e-05,
      "loss": 3.1295,
      "step": 195700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3762407941082294e-05,
      "loss": 3.1285,
      "step": 195800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3759205891770734e-05,
      "loss": 3.1206,
      "step": 195900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.375600384245917e-05,
      "loss": 3.114,
      "step": 196000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.8002207279205322,
      "eval_runtime": 119.7181,
      "eval_samples_per_second": 83.53,
      "eval_steps_per_second": 5.221,
      "step": 196000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.375280179314762e-05,
      "loss": 3.13,
      "step": 196100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.374959974383605e-05,
      "loss": 3.1081,
      "step": 196200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.37463976945245e-05,
      "loss": 3.1149,
      "step": 196300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.374319564521294e-05,
      "loss": 3.1397,
      "step": 196400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.373999359590138e-05,
      "loss": 3.132,
      "step": 196500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.373679154658982e-05,
      "loss": 3.0781,
      "step": 196600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.373358949727826e-05,
      "loss": 3.1119,
      "step": 196700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3730387447966705e-05,
      "loss": 3.1277,
      "step": 196800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.372718539865514e-05,
      "loss": 3.1133,
      "step": 196900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3723983349343584e-05,
      "loss": 3.1155,
      "step": 197000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.8027353286743164,
      "eval_runtime": 119.1707,
      "eval_samples_per_second": 83.913,
      "eval_steps_per_second": 5.245,
      "step": 197000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3720781300032024e-05,
      "loss": 3.1019,
      "step": 197100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3717579250720464e-05,
      "loss": 3.1237,
      "step": 197200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3714377201408904e-05,
      "loss": 3.1284,
      "step": 197300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.371117515209734e-05,
      "loss": 3.1229,
      "step": 197400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.370797310278579e-05,
      "loss": 3.131,
      "step": 197500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.370477105347422e-05,
      "loss": 3.125,
      "step": 197600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.370156900416267e-05,
      "loss": 3.1211,
      "step": 197700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.36983669548511e-05,
      "loss": 3.1199,
      "step": 197800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.369516490553955e-05,
      "loss": 3.1187,
      "step": 197900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.369196285622799e-05,
      "loss": 3.1201,
      "step": 198000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.798953056335449,
      "eval_runtime": 111.9286,
      "eval_samples_per_second": 89.343,
      "eval_steps_per_second": 5.584,
      "step": 198000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.368876080691643e-05,
      "loss": 3.1411,
      "step": 198100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3685558757604875e-05,
      "loss": 3.0884,
      "step": 198200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.368235670829331e-05,
      "loss": 3.103,
      "step": 198300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3679154658981754e-05,
      "loss": 3.0924,
      "step": 198400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.367595260967019e-05,
      "loss": 3.0902,
      "step": 198500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3672750560358634e-05,
      "loss": 3.1253,
      "step": 198600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3669548511047073e-05,
      "loss": 3.1212,
      "step": 198700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.366634646173551e-05,
      "loss": 3.0979,
      "step": 198800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.366314441242395e-05,
      "loss": 3.1014,
      "step": 198900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.365994236311239e-05,
      "loss": 3.1279,
      "step": 199000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.798889398574829,
      "eval_runtime": 112.9605,
      "eval_samples_per_second": 88.527,
      "eval_steps_per_second": 5.533,
      "step": 199000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.365674031380084e-05,
      "loss": 3.1008,
      "step": 199100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.365353826448927e-05,
      "loss": 3.1003,
      "step": 199200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.365033621517772e-05,
      "loss": 3.1118,
      "step": 199300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.364713416586615e-05,
      "loss": 3.1122,
      "step": 199400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.36439321165546e-05,
      "loss": 3.1433,
      "step": 199500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.364073006724304e-05,
      "loss": 3.119,
      "step": 199600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.363752801793148e-05,
      "loss": 3.114,
      "step": 199700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3634325968619924e-05,
      "loss": 3.0857,
      "step": 199800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.363112391930836e-05,
      "loss": 3.1068,
      "step": 199900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3627921869996804e-05,
      "loss": 3.1085,
      "step": 200000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.7999584674835205,
      "eval_runtime": 112.6246,
      "eval_samples_per_second": 88.791,
      "eval_steps_per_second": 5.549,
      "step": 200000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3624719820685237e-05,
      "loss": 3.0981,
      "step": 200100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.362151777137368e-05,
      "loss": 3.0946,
      "step": 200200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.361831572206212e-05,
      "loss": 3.0872,
      "step": 200300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.361511367275056e-05,
      "loss": 3.094,
      "step": 200400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.361191162343901e-05,
      "loss": 3.0875,
      "step": 200500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.360870957412744e-05,
      "loss": 3.1226,
      "step": 200600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.360550752481589e-05,
      "loss": 3.1084,
      "step": 200700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.360230547550432e-05,
      "loss": 3.1086,
      "step": 200800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.359910342619277e-05,
      "loss": 3.1175,
      "step": 200900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.35959013768812e-05,
      "loss": 3.0927,
      "step": 201000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.7997820377349854,
      "eval_runtime": 114.125,
      "eval_samples_per_second": 87.623,
      "eval_steps_per_second": 5.476,
      "step": 201000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.359269932756965e-05,
      "loss": 3.097,
      "step": 201100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.358949727825809e-05,
      "loss": 3.0984,
      "step": 201200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.358629522894653e-05,
      "loss": 3.1188,
      "step": 201300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3583093179634973e-05,
      "loss": 3.1083,
      "step": 201400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3579891130323406e-05,
      "loss": 3.1118,
      "step": 201500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.357668908101185e-05,
      "loss": 3.1201,
      "step": 201600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3573487031700286e-05,
      "loss": 3.1023,
      "step": 201700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.357028498238873e-05,
      "loss": 3.1261,
      "step": 201800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.356708293307717e-05,
      "loss": 3.108,
      "step": 201900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.356388088376561e-05,
      "loss": 3.1156,
      "step": 202000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.799975872039795,
      "eval_runtime": 110.5684,
      "eval_samples_per_second": 90.442,
      "eval_steps_per_second": 5.653,
      "step": 202000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.356067883445406e-05,
      "loss": 3.1011,
      "step": 202100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.355747678514249e-05,
      "loss": 3.1502,
      "step": 202200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.355427473583094e-05,
      "loss": 3.1129,
      "step": 202300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.355107268651937e-05,
      "loss": 3.1273,
      "step": 202400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.354787063720782e-05,
      "loss": 3.134,
      "step": 202500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.354466858789625e-05,
      "loss": 3.1581,
      "step": 202600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.35414665385847e-05,
      "loss": 3.1069,
      "step": 202700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3538264489273137e-05,
      "loss": 3.1055,
      "step": 202800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3535062439961576e-05,
      "loss": 3.1082,
      "step": 202900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.353186039065002e-05,
      "loss": 3.1327,
      "step": 203000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.7986254692077637,
      "eval_runtime": 119.3695,
      "eval_samples_per_second": 83.773,
      "eval_steps_per_second": 5.236,
      "step": 203000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3528658341338456e-05,
      "loss": 3.1271,
      "step": 203100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.35254562920269e-05,
      "loss": 3.1253,
      "step": 203200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3522254242715335e-05,
      "loss": 3.1155,
      "step": 203300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.351905219340378e-05,
      "loss": 3.1204,
      "step": 203400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.351585014409222e-05,
      "loss": 3.1448,
      "step": 203500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.351264809478066e-05,
      "loss": 3.1358,
      "step": 203600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.350944604546911e-05,
      "loss": 3.1125,
      "step": 203700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.350624399615754e-05,
      "loss": 3.1167,
      "step": 203800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.350304194684599e-05,
      "loss": 3.1398,
      "step": 203900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.349983989753442e-05,
      "loss": 3.1134,
      "step": 204000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.7976744174957275,
      "eval_runtime": 109.9744,
      "eval_samples_per_second": 90.93,
      "eval_steps_per_second": 5.683,
      "step": 204000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.349663784822287e-05,
      "loss": 3.1442,
      "step": 204100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.34934357989113e-05,
      "loss": 3.1305,
      "step": 204200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3490233749599746e-05,
      "loss": 3.1005,
      "step": 204300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3487031700288186e-05,
      "loss": 3.1282,
      "step": 204400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3483829650976626e-05,
      "loss": 3.1306,
      "step": 204500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.348062760166507e-05,
      "loss": 3.1251,
      "step": 204600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3477425552353505e-05,
      "loss": 3.1001,
      "step": 204700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.347422350304195e-05,
      "loss": 3.135,
      "step": 204800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3471021453730385e-05,
      "loss": 3.1109,
      "step": 204900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.346781940441883e-05,
      "loss": 3.1434,
      "step": 205000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.794476270675659,
      "eval_runtime": 110.394,
      "eval_samples_per_second": 90.585,
      "eval_steps_per_second": 5.662,
      "step": 205000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.346461735510727e-05,
      "loss": 3.137,
      "step": 205100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.346141530579571e-05,
      "loss": 3.1485,
      "step": 205200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.345821325648416e-05,
      "loss": 3.1424,
      "step": 205300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.345501120717259e-05,
      "loss": 3.1342,
      "step": 205400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.345180915786104e-05,
      "loss": 3.13,
      "step": 205500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.344860710854947e-05,
      "loss": 3.1307,
      "step": 205600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3445405059237916e-05,
      "loss": 3.1322,
      "step": 205700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3442203009926356e-05,
      "loss": 3.109,
      "step": 205800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3439000960614796e-05,
      "loss": 3.1427,
      "step": 205900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3435798911303235e-05,
      "loss": 3.1124,
      "step": 206000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.7924013137817383,
      "eval_runtime": 110.4734,
      "eval_samples_per_second": 90.52,
      "eval_steps_per_second": 5.657,
      "step": 206000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3432596861991675e-05,
      "loss": 3.113,
      "step": 206100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.342939481268012e-05,
      "loss": 3.1242,
      "step": 206200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3426192763368555e-05,
      "loss": 3.1255,
      "step": 206300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3422990714057e-05,
      "loss": 3.1319,
      "step": 206400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3419788664745434e-05,
      "loss": 3.1292,
      "step": 206500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.341658661543388e-05,
      "loss": 3.1246,
      "step": 206600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.341338456612232e-05,
      "loss": 3.1162,
      "step": 206700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.341018251681076e-05,
      "loss": 3.1372,
      "step": 206800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.34069804674992e-05,
      "loss": 3.1362,
      "step": 206900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.340377841818764e-05,
      "loss": 3.1218,
      "step": 207000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.7943174839019775,
      "eval_runtime": 110.0267,
      "eval_samples_per_second": 90.887,
      "eval_steps_per_second": 5.68,
      "step": 207000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3400576368876086e-05,
      "loss": 3.1224,
      "step": 207100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.339737431956452e-05,
      "loss": 3.1321,
      "step": 207200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3394172270252965e-05,
      "loss": 3.085,
      "step": 207300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3390970220941405e-05,
      "loss": 3.0981,
      "step": 207400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3387768171629845e-05,
      "loss": 3.0851,
      "step": 207500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3384566122318285e-05,
      "loss": 3.1179,
      "step": 207600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3381364073006724e-05,
      "loss": 3.129,
      "step": 207700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.337816202369517e-05,
      "loss": 3.1144,
      "step": 207800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3374959974383604e-05,
      "loss": 3.1007,
      "step": 207900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.337175792507205e-05,
      "loss": 3.0972,
      "step": 208000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.797286033630371,
      "eval_runtime": 110.4303,
      "eval_samples_per_second": 90.555,
      "eval_steps_per_second": 5.66,
      "step": 208000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.336855587576049e-05,
      "loss": 3.126,
      "step": 208100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.336535382644893e-05,
      "loss": 3.1163,
      "step": 208200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.336215177713737e-05,
      "loss": 3.1155,
      "step": 208300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.335894972782581e-05,
      "loss": 3.112,
      "step": 208400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.335574767851425e-05,
      "loss": 3.107,
      "step": 208500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.335254562920269e-05,
      "loss": 3.1206,
      "step": 208600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3349343579891135e-05,
      "loss": 3.1201,
      "step": 208700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.334614153057957e-05,
      "loss": 3.0891,
      "step": 208800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3342939481268015e-05,
      "loss": 3.1319,
      "step": 208900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3339737431956455e-05,
      "loss": 3.1025,
      "step": 209000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.793102264404297,
      "eval_runtime": 121.4601,
      "eval_samples_per_second": 82.332,
      "eval_steps_per_second": 5.146,
      "step": 209000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3336535382644894e-05,
      "loss": 3.1055,
      "step": 209100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3333333333333334e-05,
      "loss": 3.0903,
      "step": 209200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3330131284021774e-05,
      "loss": 3.1237,
      "step": 209300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.332692923471022e-05,
      "loss": 3.0961,
      "step": 209400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.332372718539865e-05,
      "loss": 3.0997,
      "step": 209500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.33205251360871e-05,
      "loss": 3.1035,
      "step": 209600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.331732308677554e-05,
      "loss": 3.1126,
      "step": 209700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.331412103746398e-05,
      "loss": 3.1086,
      "step": 209800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.331091898815242e-05,
      "loss": 3.0873,
      "step": 209900
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.330771693884086e-05,
      "loss": 3.1018,
      "step": 210000
    },
    {
      "epoch": 0.13,
      "eval_loss": 2.79235577583313,
      "eval_runtime": 115.4116,
      "eval_samples_per_second": 86.646,
      "eval_steps_per_second": 5.415,
      "step": 210000
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.33045148895293e-05,
      "loss": 3.1341,
      "step": 210100
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.330131284021774e-05,
      "loss": 3.1314,
      "step": 210200
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3298110790906185e-05,
      "loss": 3.1363,
      "step": 210300
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3294908741594624e-05,
      "loss": 3.1282,
      "step": 210400
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3291706692283064e-05,
      "loss": 3.0981,
      "step": 210500
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3288504642971504e-05,
      "loss": 3.1028,
      "step": 210600
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3285302593659944e-05,
      "loss": 3.1023,
      "step": 210700
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.3282100544348383e-05,
      "loss": 3.1313,
      "step": 210800
    },
    {
      "epoch": 0.13,
      "learning_rate": 4.327889849503682e-05,
      "loss": 3.119,
      "step": 210900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.327569644572527e-05,
      "loss": 3.1018,
      "step": 211000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7935893535614014,
      "eval_runtime": 113.8258,
      "eval_samples_per_second": 87.854,
      "eval_steps_per_second": 5.491,
      "step": 211000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.32724943964137e-05,
      "loss": 3.1242,
      "step": 211100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.326929234710215e-05,
      "loss": 3.1006,
      "step": 211200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.326609029779059e-05,
      "loss": 3.1017,
      "step": 211300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.326288824847903e-05,
      "loss": 3.1312,
      "step": 211400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.325968619916747e-05,
      "loss": 3.0932,
      "step": 211500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.325648414985591e-05,
      "loss": 3.1131,
      "step": 211600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.325328210054435e-05,
      "loss": 3.1029,
      "step": 211700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.325008005123279e-05,
      "loss": 3.0976,
      "step": 211800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3246878001921234e-05,
      "loss": 3.1108,
      "step": 211900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3243675952609674e-05,
      "loss": 3.0845,
      "step": 212000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7957663536071777,
      "eval_runtime": 110.001,
      "eval_samples_per_second": 90.908,
      "eval_steps_per_second": 5.682,
      "step": 212000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3240473903298114e-05,
      "loss": 3.1048,
      "step": 212100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.323727185398655e-05,
      "loss": 3.1106,
      "step": 212200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.323406980467499e-05,
      "loss": 3.1036,
      "step": 212300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.323086775536343e-05,
      "loss": 3.0942,
      "step": 212400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.322766570605187e-05,
      "loss": 3.1277,
      "step": 212500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.322446365674032e-05,
      "loss": 3.1087,
      "step": 212600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.322126160742876e-05,
      "loss": 3.105,
      "step": 212700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.32180595581172e-05,
      "loss": 3.1065,
      "step": 212800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.321485750880564e-05,
      "loss": 3.1164,
      "step": 212900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.321165545949408e-05,
      "loss": 3.0876,
      "step": 213000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7949841022491455,
      "eval_runtime": 109.7792,
      "eval_samples_per_second": 91.092,
      "eval_steps_per_second": 5.693,
      "step": 213000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.320845341018252e-05,
      "loss": 3.1091,
      "step": 213100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.320525136087096e-05,
      "loss": 3.1291,
      "step": 213200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.32020493115594e-05,
      "loss": 3.0914,
      "step": 213300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.319884726224784e-05,
      "loss": 3.1021,
      "step": 213400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3195645212936283e-05,
      "loss": 3.1048,
      "step": 213500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.319244316362472e-05,
      "loss": 3.1175,
      "step": 213600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.318924111431316e-05,
      "loss": 3.1311,
      "step": 213700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.31860390650016e-05,
      "loss": 3.1291,
      "step": 213800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.318283701569004e-05,
      "loss": 3.1161,
      "step": 213900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.317963496637848e-05,
      "loss": 3.136,
      "step": 214000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.788466691970825,
      "eval_runtime": 119.3192,
      "eval_samples_per_second": 83.809,
      "eval_steps_per_second": 5.238,
      "step": 214000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.317643291706692e-05,
      "loss": 3.1089,
      "step": 214100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.317323086775537e-05,
      "loss": 3.1134,
      "step": 214200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.317002881844381e-05,
      "loss": 3.1292,
      "step": 214300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.316682676913225e-05,
      "loss": 3.1087,
      "step": 214400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.316362471982069e-05,
      "loss": 3.1023,
      "step": 214500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.316042267050913e-05,
      "loss": 3.1413,
      "step": 214600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.315722062119757e-05,
      "loss": 3.1096,
      "step": 214700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.315401857188601e-05,
      "loss": 3.1052,
      "step": 214800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3150816522574447e-05,
      "loss": 3.0737,
      "step": 214900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.314761447326289e-05,
      "loss": 3.1331,
      "step": 215000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.789860486984253,
      "eval_runtime": 110.2208,
      "eval_samples_per_second": 90.727,
      "eval_steps_per_second": 5.67,
      "step": 215000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.314441242395133e-05,
      "loss": 3.116,
      "step": 215100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.314121037463977e-05,
      "loss": 3.1044,
      "step": 215200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.313800832532821e-05,
      "loss": 3.1264,
      "step": 215300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.313480627601665e-05,
      "loss": 3.0941,
      "step": 215400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.313160422670509e-05,
      "loss": 3.1086,
      "step": 215500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.312840217739353e-05,
      "loss": 3.108,
      "step": 215600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.312520012808198e-05,
      "loss": 3.1069,
      "step": 215700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.312199807877042e-05,
      "loss": 3.099,
      "step": 215800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.311879602945886e-05,
      "loss": 3.0967,
      "step": 215900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.31155939801473e-05,
      "loss": 3.1297,
      "step": 216000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7900583744049072,
      "eval_runtime": 109.5579,
      "eval_samples_per_second": 91.276,
      "eval_steps_per_second": 5.705,
      "step": 216000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.311239193083574e-05,
      "loss": 3.098,
      "step": 216100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.310918988152418e-05,
      "loss": 3.1091,
      "step": 216200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3105987832212616e-05,
      "loss": 3.0902,
      "step": 216300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3102785782901056e-05,
      "loss": 3.0866,
      "step": 216400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3099583733589496e-05,
      "loss": 3.0902,
      "step": 216500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.309638168427794e-05,
      "loss": 3.1135,
      "step": 216600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.309317963496638e-05,
      "loss": 3.1005,
      "step": 216700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.308997758565482e-05,
      "loss": 3.0724,
      "step": 216800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.308677553634326e-05,
      "loss": 3.1153,
      "step": 216900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.30835734870317e-05,
      "loss": 3.0814,
      "step": 217000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.78932785987854,
      "eval_runtime": 110.1861,
      "eval_samples_per_second": 90.756,
      "eval_steps_per_second": 5.672,
      "step": 217000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.308037143772014e-05,
      "loss": 3.1078,
      "step": 217100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.307716938840858e-05,
      "loss": 3.0867,
      "step": 217200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.307396733909703e-05,
      "loss": 3.0843,
      "step": 217300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.307076528978547e-05,
      "loss": 3.1028,
      "step": 217400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.306756324047391e-05,
      "loss": 3.1049,
      "step": 217500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.306436119116235e-05,
      "loss": 3.1018,
      "step": 217600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3061159141850786e-05,
      "loss": 3.1165,
      "step": 217700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3057957092539226e-05,
      "loss": 3.0869,
      "step": 217800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3054755043227666e-05,
      "loss": 3.113,
      "step": 217900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.305155299391611e-05,
      "loss": 3.1098,
      "step": 218000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7864022254943848,
      "eval_runtime": 110.6798,
      "eval_samples_per_second": 90.351,
      "eval_steps_per_second": 5.647,
      "step": 218000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3048350944604545e-05,
      "loss": 3.0872,
      "step": 218100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.304514889529299e-05,
      "loss": 3.0896,
      "step": 218200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.304194684598143e-05,
      "loss": 3.1187,
      "step": 218300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.303874479666987e-05,
      "loss": 3.1225,
      "step": 218400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.303554274735831e-05,
      "loss": 3.0807,
      "step": 218500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.303234069804675e-05,
      "loss": 3.0801,
      "step": 218600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.302913864873519e-05,
      "loss": 3.0818,
      "step": 218700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.302593659942363e-05,
      "loss": 3.1022,
      "step": 218800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.302273455011208e-05,
      "loss": 3.0806,
      "step": 218900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3019532500800517e-05,
      "loss": 3.085,
      "step": 219000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.788654088973999,
      "eval_runtime": 121.6739,
      "eval_samples_per_second": 82.187,
      "eval_steps_per_second": 5.137,
      "step": 219000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3016330451488956e-05,
      "loss": 3.101,
      "step": 219100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3013128402177396e-05,
      "loss": 3.0997,
      "step": 219200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3009926352865836e-05,
      "loss": 3.1002,
      "step": 219300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3006724303554275e-05,
      "loss": 3.0771,
      "step": 219400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.3003522254242715e-05,
      "loss": 3.0991,
      "step": 219500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.300032020493116e-05,
      "loss": 3.0989,
      "step": 219600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2997118155619595e-05,
      "loss": 3.0933,
      "step": 219700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.299391610630804e-05,
      "loss": 3.0981,
      "step": 219800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.299071405699648e-05,
      "loss": 3.1192,
      "step": 219900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.298751200768492e-05,
      "loss": 3.104,
      "step": 220000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7837576866149902,
      "eval_runtime": 110.8536,
      "eval_samples_per_second": 90.209,
      "eval_steps_per_second": 5.638,
      "step": 220000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.298430995837336e-05,
      "loss": 3.069,
      "step": 220100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.29811079090618e-05,
      "loss": 3.1071,
      "step": 220200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.297790585975025e-05,
      "loss": 3.0767,
      "step": 220300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.297470381043868e-05,
      "loss": 3.0996,
      "step": 220400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2971501761127126e-05,
      "loss": 3.0869,
      "step": 220500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2968299711815566e-05,
      "loss": 3.0944,
      "step": 220600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2965097662504006e-05,
      "loss": 3.0969,
      "step": 220700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2961895613192445e-05,
      "loss": 3.1229,
      "step": 220800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2958693563880885e-05,
      "loss": 3.0821,
      "step": 220900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2955491514569325e-05,
      "loss": 3.0697,
      "step": 221000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7865850925445557,
      "eval_runtime": 110.7156,
      "eval_samples_per_second": 90.322,
      "eval_steps_per_second": 5.645,
      "step": 221000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2952289465257765e-05,
      "loss": 3.1024,
      "step": 221100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.294908741594621e-05,
      "loss": 3.0659,
      "step": 221200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2945885366634644e-05,
      "loss": 3.0848,
      "step": 221300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.294268331732309e-05,
      "loss": 3.1029,
      "step": 221400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.293948126801153e-05,
      "loss": 3.061,
      "step": 221500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.293627921869997e-05,
      "loss": 3.103,
      "step": 221600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.293307716938841e-05,
      "loss": 3.1015,
      "step": 221700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.292987512007685e-05,
      "loss": 3.0894,
      "step": 221800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2926673070765296e-05,
      "loss": 3.0768,
      "step": 221900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.292347102145373e-05,
      "loss": 3.1045,
      "step": 222000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.784503936767578,
      "eval_runtime": 110.0127,
      "eval_samples_per_second": 90.899,
      "eval_steps_per_second": 5.681,
      "step": 222000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2920268972142176e-05,
      "loss": 3.1015,
      "step": 222100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2917066922830615e-05,
      "loss": 3.0867,
      "step": 222200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2913864873519055e-05,
      "loss": 3.1176,
      "step": 222300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2910662824207495e-05,
      "loss": 3.0938,
      "step": 222400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2907460774895934e-05,
      "loss": 3.0786,
      "step": 222500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.290425872558438e-05,
      "loss": 3.1032,
      "step": 222600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2901056676272814e-05,
      "loss": 3.1148,
      "step": 222700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.289785462696126e-05,
      "loss": 3.1046,
      "step": 222800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2894652577649693e-05,
      "loss": 3.1105,
      "step": 222900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.289145052833814e-05,
      "loss": 3.1111,
      "step": 223000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7858753204345703,
      "eval_runtime": 119.5709,
      "eval_samples_per_second": 83.632,
      "eval_steps_per_second": 5.227,
      "step": 223000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.288824847902658e-05,
      "loss": 3.119,
      "step": 223100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.288504642971502e-05,
      "loss": 3.0926,
      "step": 223200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.288184438040346e-05,
      "loss": 3.0808,
      "step": 223300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.28786423310919e-05,
      "loss": 3.0936,
      "step": 223400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2875440281780345e-05,
      "loss": 3.1083,
      "step": 223500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.287223823246878e-05,
      "loss": 3.1343,
      "step": 223600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2869036183157225e-05,
      "loss": 3.1111,
      "step": 223700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2865834133845665e-05,
      "loss": 3.1033,
      "step": 223800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2862632084534104e-05,
      "loss": 3.1016,
      "step": 223900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2859430035222544e-05,
      "loss": 3.0651,
      "step": 224000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.7820630073547363,
      "eval_runtime": 109.5931,
      "eval_samples_per_second": 91.247,
      "eval_steps_per_second": 5.703,
      "step": 224000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2856227985910984e-05,
      "loss": 3.0763,
      "step": 224100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.285302593659943e-05,
      "loss": 3.0697,
      "step": 224200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.284982388728786e-05,
      "loss": 3.0985,
      "step": 224300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.284662183797631e-05,
      "loss": 3.1069,
      "step": 224400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.284341978866474e-05,
      "loss": 3.0632,
      "step": 224500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.284021773935319e-05,
      "loss": 3.1002,
      "step": 224600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.283701569004163e-05,
      "loss": 3.1321,
      "step": 224700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.283381364073007e-05,
      "loss": 3.1193,
      "step": 224800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2830611591418515e-05,
      "loss": 3.1156,
      "step": 224900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.282740954210695e-05,
      "loss": 3.0849,
      "step": 225000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.784259796142578,
      "eval_runtime": 109.6481,
      "eval_samples_per_second": 91.201,
      "eval_steps_per_second": 5.7,
      "step": 225000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2824207492795395e-05,
      "loss": 3.0895,
      "step": 225100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.282100544348383e-05,
      "loss": 3.109,
      "step": 225200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2817803394172274e-05,
      "loss": 3.0939,
      "step": 225300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2814601344860714e-05,
      "loss": 3.1146,
      "step": 225400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2811399295549154e-05,
      "loss": 3.0925,
      "step": 225500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2808197246237594e-05,
      "loss": 3.0947,
      "step": 225600
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.280499519692603e-05,
      "loss": 3.0955,
      "step": 225700
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.280179314761448e-05,
      "loss": 3.081,
      "step": 225800
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.279859109830291e-05,
      "loss": 3.0802,
      "step": 225900
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.279538904899136e-05,
      "loss": 3.0809,
      "step": 226000
    },
    {
      "epoch": 0.14,
      "eval_loss": 2.784475088119507,
      "eval_runtime": 109.5186,
      "eval_samples_per_second": 91.309,
      "eval_steps_per_second": 5.707,
      "step": 226000
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.279218699967979e-05,
      "loss": 3.0801,
      "step": 226100
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.278898495036824e-05,
      "loss": 3.1187,
      "step": 226200
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.278578290105668e-05,
      "loss": 3.1122,
      "step": 226300
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.278258085174512e-05,
      "loss": 3.0924,
      "step": 226400
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.2779378802433565e-05,
      "loss": 3.1036,
      "step": 226500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2776176753122e-05,
      "loss": 3.1057,
      "step": 226600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2772974703810444e-05,
      "loss": 3.1151,
      "step": 226700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.276977265449888e-05,
      "loss": 3.0881,
      "step": 226800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2766570605187324e-05,
      "loss": 3.1117,
      "step": 226900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2763368555875763e-05,
      "loss": 3.1164,
      "step": 227000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7834014892578125,
      "eval_runtime": 119.8079,
      "eval_samples_per_second": 83.467,
      "eval_steps_per_second": 5.217,
      "step": 227000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.27601665065642e-05,
      "loss": 3.0966,
      "step": 227100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.275696445725264e-05,
      "loss": 3.1124,
      "step": 227200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.275376240794108e-05,
      "loss": 3.1291,
      "step": 227300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.275056035862953e-05,
      "loss": 3.1148,
      "step": 227400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.274735830931796e-05,
      "loss": 3.1048,
      "step": 227500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.274415626000641e-05,
      "loss": 3.0991,
      "step": 227600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.274095421069484e-05,
      "loss": 3.0997,
      "step": 227700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.273775216138329e-05,
      "loss": 3.1064,
      "step": 227800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.273455011207173e-05,
      "loss": 3.0857,
      "step": 227900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.273134806276017e-05,
      "loss": 3.1108,
      "step": 228000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7827634811401367,
      "eval_runtime": 110.0925,
      "eval_samples_per_second": 90.833,
      "eval_steps_per_second": 5.677,
      "step": 228000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2728146013448614e-05,
      "loss": 3.1086,
      "step": 228100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.272494396413705e-05,
      "loss": 3.1077,
      "step": 228200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2721741914825494e-05,
      "loss": 3.09,
      "step": 228300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2718539865513927e-05,
      "loss": 3.107,
      "step": 228400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.271533781620237e-05,
      "loss": 3.0878,
      "step": 228500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.271213576689081e-05,
      "loss": 3.1096,
      "step": 228600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.270893371757925e-05,
      "loss": 3.1022,
      "step": 228700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.270573166826769e-05,
      "loss": 3.1051,
      "step": 228800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.270252961895613e-05,
      "loss": 3.1213,
      "step": 228900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.269932756964458e-05,
      "loss": 3.0758,
      "step": 229000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7838051319122314,
      "eval_runtime": 110.9594,
      "eval_samples_per_second": 90.123,
      "eval_steps_per_second": 5.633,
      "step": 229000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.269612552033301e-05,
      "loss": 3.112,
      "step": 229100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.269292347102146e-05,
      "loss": 3.0828,
      "step": 229200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.268972142170989e-05,
      "loss": 3.1128,
      "step": 229300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.268651937239834e-05,
      "loss": 3.1025,
      "step": 229400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.268331732308678e-05,
      "loss": 3.0702,
      "step": 229500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.268011527377522e-05,
      "loss": 3.117,
      "step": 229600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2676913224463663e-05,
      "loss": 3.0803,
      "step": 229700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2673711175152096e-05,
      "loss": 3.0958,
      "step": 229800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.267050912584054e-05,
      "loss": 3.0651,
      "step": 229900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2667307076528976e-05,
      "loss": 3.0675,
      "step": 230000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7814781665802,
      "eval_runtime": 112.7123,
      "eval_samples_per_second": 88.721,
      "eval_steps_per_second": 5.545,
      "step": 230000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.266410502721742e-05,
      "loss": 3.0634,
      "step": 230100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.266090297790586e-05,
      "loss": 3.0603,
      "step": 230200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.26577009285943e-05,
      "loss": 3.0977,
      "step": 230300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.265449887928274e-05,
      "loss": 3.1038,
      "step": 230400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.265129682997118e-05,
      "loss": 3.078,
      "step": 230500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.264809478065963e-05,
      "loss": 3.0923,
      "step": 230600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.264489273134806e-05,
      "loss": 3.113,
      "step": 230700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.264169068203651e-05,
      "loss": 3.0834,
      "step": 230800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.263848863272494e-05,
      "loss": 3.1157,
      "step": 230900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.263528658341339e-05,
      "loss": 3.0996,
      "step": 231000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7801432609558105,
      "eval_runtime": 120.6608,
      "eval_samples_per_second": 82.877,
      "eval_steps_per_second": 5.18,
      "step": 231000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2632084534101827e-05,
      "loss": 3.0784,
      "step": 231100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2628882484790266e-05,
      "loss": 3.0897,
      "step": 231200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.262568043547871e-05,
      "loss": 3.1042,
      "step": 231300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2622478386167146e-05,
      "loss": 3.0879,
      "step": 231400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.261927633685559e-05,
      "loss": 3.0753,
      "step": 231500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2616074287544025e-05,
      "loss": 3.0893,
      "step": 231600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.261287223823247e-05,
      "loss": 3.112,
      "step": 231700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.260967018892091e-05,
      "loss": 3.08,
      "step": 231800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.260646813960935e-05,
      "loss": 3.0851,
      "step": 231900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.260326609029779e-05,
      "loss": 3.0784,
      "step": 232000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7841458320617676,
      "eval_runtime": 109.8774,
      "eval_samples_per_second": 91.01,
      "eval_steps_per_second": 5.688,
      "step": 232000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.260006404098623e-05,
      "loss": 3.082,
      "step": 232100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.259686199167468e-05,
      "loss": 3.1056,
      "step": 232200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.259365994236311e-05,
      "loss": 3.0688,
      "step": 232300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.259045789305156e-05,
      "loss": 3.096,
      "step": 232400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2587255843739996e-05,
      "loss": 3.0891,
      "step": 232500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2584053794428436e-05,
      "loss": 3.1027,
      "step": 232600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2580851745116876e-05,
      "loss": 3.0804,
      "step": 232700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2577649695805316e-05,
      "loss": 3.0886,
      "step": 232800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.257444764649376e-05,
      "loss": 3.09,
      "step": 232900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2571245597182195e-05,
      "loss": 3.0739,
      "step": 233000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7793285846710205,
      "eval_runtime": 109.8648,
      "eval_samples_per_second": 91.021,
      "eval_steps_per_second": 5.689,
      "step": 233000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.256804354787064e-05,
      "loss": 3.0917,
      "step": 233100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2564841498559075e-05,
      "loss": 3.0847,
      "step": 233200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.256163944924752e-05,
      "loss": 3.0699,
      "step": 233300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.255843739993596e-05,
      "loss": 3.0919,
      "step": 233400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.25552353506244e-05,
      "loss": 3.0811,
      "step": 233500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.255203330131284e-05,
      "loss": 3.0835,
      "step": 233600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.254883125200128e-05,
      "loss": 3.0569,
      "step": 233700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.254562920268973e-05,
      "loss": 3.0731,
      "step": 233800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.254242715337816e-05,
      "loss": 3.09,
      "step": 233900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2539225104066606e-05,
      "loss": 3.0876,
      "step": 234000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.781632423400879,
      "eval_runtime": 109.8157,
      "eval_samples_per_second": 91.062,
      "eval_steps_per_second": 5.691,
      "step": 234000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2536023054755046e-05,
      "loss": 3.0781,
      "step": 234100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2532821005443486e-05,
      "loss": 3.1168,
      "step": 234200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2529618956131925e-05,
      "loss": 3.087,
      "step": 234300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2526416906820365e-05,
      "loss": 3.1023,
      "step": 234400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.252321485750881e-05,
      "loss": 3.1016,
      "step": 234500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2520012808197245e-05,
      "loss": 3.0846,
      "step": 234600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.251681075888569e-05,
      "loss": 3.1051,
      "step": 234700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.251360870957413e-05,
      "loss": 3.0995,
      "step": 234800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.251040666026257e-05,
      "loss": 3.0877,
      "step": 234900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.250720461095101e-05,
      "loss": 3.11,
      "step": 235000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7791457176208496,
      "eval_runtime": 122.3962,
      "eval_samples_per_second": 81.702,
      "eval_steps_per_second": 5.106,
      "step": 235000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.250400256163945e-05,
      "loss": 3.087,
      "step": 235100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.250080051232789e-05,
      "loss": 3.0965,
      "step": 235200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.249759846301633e-05,
      "loss": 3.1011,
      "step": 235300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2494396413704776e-05,
      "loss": 3.0649,
      "step": 235400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2491194364393216e-05,
      "loss": 3.0989,
      "step": 235500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2487992315081655e-05,
      "loss": 3.1191,
      "step": 235600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2484790265770095e-05,
      "loss": 3.0717,
      "step": 235700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2481588216458535e-05,
      "loss": 3.1019,
      "step": 235800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2478386167146975e-05,
      "loss": 3.1053,
      "step": 235900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2475184117835414e-05,
      "loss": 3.0859,
      "step": 236000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7787585258483887,
      "eval_runtime": 109.8949,
      "eval_samples_per_second": 90.996,
      "eval_steps_per_second": 5.687,
      "step": 236000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.247198206852386e-05,
      "loss": 3.1064,
      "step": 236100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2468780019212294e-05,
      "loss": 3.135,
      "step": 236200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.246557796990074e-05,
      "loss": 3.0873,
      "step": 236300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.246237592058918e-05,
      "loss": 3.0917,
      "step": 236400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.245917387127762e-05,
      "loss": 3.0871,
      "step": 236500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.245597182196606e-05,
      "loss": 3.1092,
      "step": 236600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.24527697726545e-05,
      "loss": 3.0813,
      "step": 236700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.244956772334294e-05,
      "loss": 3.0859,
      "step": 236800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.244636567403138e-05,
      "loss": 3.0854,
      "step": 236900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2443163624719825e-05,
      "loss": 3.0789,
      "step": 237000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.77862811088562,
      "eval_runtime": 110.1081,
      "eval_samples_per_second": 90.82,
      "eval_steps_per_second": 5.676,
      "step": 237000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2439961575408265e-05,
      "loss": 3.1221,
      "step": 237100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2436759526096705e-05,
      "loss": 3.0951,
      "step": 237200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2433557476785145e-05,
      "loss": 3.0983,
      "step": 237300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2430355427473584e-05,
      "loss": 3.0714,
      "step": 237400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2427153378162024e-05,
      "loss": 3.075,
      "step": 237500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2423951328850464e-05,
      "loss": 3.075,
      "step": 237600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.242074927953891e-05,
      "loss": 3.0681,
      "step": 237700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.241754723022735e-05,
      "loss": 3.0772,
      "step": 237800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.241434518091579e-05,
      "loss": 3.0937,
      "step": 237900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.241114313160423e-05,
      "loss": 3.0738,
      "step": 238000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7760043144226074,
      "eval_runtime": 120.7961,
      "eval_samples_per_second": 82.784,
      "eval_steps_per_second": 5.174,
      "step": 238000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.240794108229267e-05,
      "loss": 3.0879,
      "step": 238100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.240473903298111e-05,
      "loss": 3.1005,
      "step": 238200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.240153698366955e-05,
      "loss": 3.0961,
      "step": 238300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.239833493435799e-05,
      "loss": 3.0844,
      "step": 238400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.239513288504643e-05,
      "loss": 3.097,
      "step": 238500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2391930835734875e-05,
      "loss": 3.0939,
      "step": 238600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2388728786423314e-05,
      "loss": 3.0917,
      "step": 238700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2385526737111754e-05,
      "loss": 3.0959,
      "step": 238800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2382324687800194e-05,
      "loss": 3.0839,
      "step": 238900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2379122638488634e-05,
      "loss": 3.1071,
      "step": 239000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7748935222625732,
      "eval_runtime": 110.5795,
      "eval_samples_per_second": 90.433,
      "eval_steps_per_second": 5.652,
      "step": 239000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2375920589177073e-05,
      "loss": 3.0841,
      "step": 239100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.237271853986551e-05,
      "loss": 3.0847,
      "step": 239200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.236951649055396e-05,
      "loss": 3.0938,
      "step": 239300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.23663144412424e-05,
      "loss": 3.0936,
      "step": 239400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.236311239193084e-05,
      "loss": 3.1039,
      "step": 239500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.235991034261928e-05,
      "loss": 3.0832,
      "step": 239600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.235670829330772e-05,
      "loss": 3.0537,
      "step": 239700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.235350624399616e-05,
      "loss": 3.0698,
      "step": 239800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.23503041946846e-05,
      "loss": 3.055,
      "step": 239900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.234710214537304e-05,
      "loss": 3.0886,
      "step": 240000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.774780750274658,
      "eval_runtime": 111.1768,
      "eval_samples_per_second": 89.947,
      "eval_steps_per_second": 5.622,
      "step": 240000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2343900096061484e-05,
      "loss": 3.0701,
      "step": 240100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2340698046749924e-05,
      "loss": 3.0871,
      "step": 240200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2337495997438364e-05,
      "loss": 3.0648,
      "step": 240300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2334293948126804e-05,
      "loss": 3.0863,
      "step": 240400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.233109189881524e-05,
      "loss": 3.0448,
      "step": 240500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.232788984950368e-05,
      "loss": 3.0685,
      "step": 240600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.232468780019212e-05,
      "loss": 3.0728,
      "step": 240700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.232148575088056e-05,
      "loss": 3.0998,
      "step": 240800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.231828370156901e-05,
      "loss": 3.0701,
      "step": 240900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.231508165225745e-05,
      "loss": 3.0606,
      "step": 241000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.773284435272217,
      "eval_runtime": 109.6006,
      "eval_samples_per_second": 91.24,
      "eval_steps_per_second": 5.703,
      "step": 241000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.231187960294589e-05,
      "loss": 3.0987,
      "step": 241100
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.230867755363433e-05,
      "loss": 3.0383,
      "step": 241200
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.230547550432277e-05,
      "loss": 3.1018,
      "step": 241300
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.230227345501121e-05,
      "loss": 3.0732,
      "step": 241400
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.229907140569965e-05,
      "loss": 3.0766,
      "step": 241500
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.229586935638809e-05,
      "loss": 3.0952,
      "step": 241600
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2292667307076534e-05,
      "loss": 3.1008,
      "step": 241700
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.2289465257764973e-05,
      "loss": 3.1106,
      "step": 241800
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.228626320845341e-05,
      "loss": 3.1022,
      "step": 241900
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.228306115914185e-05,
      "loss": 3.0922,
      "step": 242000
    },
    {
      "epoch": 0.15,
      "eval_loss": 2.7724921703338623,
      "eval_runtime": 110.7079,
      "eval_samples_per_second": 90.328,
      "eval_steps_per_second": 5.645,
      "step": 242000
    },
    {
      "epoch": 0.15,
      "learning_rate": 4.227985910983029e-05,
      "loss": 3.1024,
      "step": 242100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.227665706051873e-05,
      "loss": 3.1135,
      "step": 242200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.227345501120717e-05,
      "loss": 3.092,
      "step": 242300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.227025296189562e-05,
      "loss": 3.0827,
      "step": 242400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.226705091258406e-05,
      "loss": 3.1278,
      "step": 242500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.22638488632725e-05,
      "loss": 3.0799,
      "step": 242600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.226064681396094e-05,
      "loss": 3.096,
      "step": 242700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.225744476464938e-05,
      "loss": 3.0866,
      "step": 242800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.225424271533782e-05,
      "loss": 3.0912,
      "step": 242900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.225104066602626e-05,
      "loss": 3.0968,
      "step": 243000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.773735523223877,
      "eval_runtime": 111.6694,
      "eval_samples_per_second": 89.55,
      "eval_steps_per_second": 5.597,
      "step": 243000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.22478386167147e-05,
      "loss": 3.0755,
      "step": 243100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2244636567403137e-05,
      "loss": 3.0865,
      "step": 243200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.224143451809158e-05,
      "loss": 3.0766,
      "step": 243300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.223823246878002e-05,
      "loss": 3.1025,
      "step": 243400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.223503041946846e-05,
      "loss": 3.0903,
      "step": 243500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.22318283701569e-05,
      "loss": 3.0995,
      "step": 243600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.222862632084534e-05,
      "loss": 3.0912,
      "step": 243700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.222542427153378e-05,
      "loss": 3.1035,
      "step": 243800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.222222222222222e-05,
      "loss": 3.0917,
      "step": 243900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.221902017291067e-05,
      "loss": 3.0901,
      "step": 244000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7716867923736572,
      "eval_runtime": 113.8161,
      "eval_samples_per_second": 87.861,
      "eval_steps_per_second": 5.491,
      "step": 244000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.221581812359911e-05,
      "loss": 3.1139,
      "step": 244100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.221261607428755e-05,
      "loss": 3.1125,
      "step": 244200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.220941402497599e-05,
      "loss": 3.1117,
      "step": 244300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.220621197566443e-05,
      "loss": 3.0904,
      "step": 244400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.220300992635287e-05,
      "loss": 3.0995,
      "step": 244500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2199807877041306e-05,
      "loss": 3.0834,
      "step": 244600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.219660582772975e-05,
      "loss": 3.1031,
      "step": 244700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2193403778418186e-05,
      "loss": 3.0884,
      "step": 244800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.219020172910663e-05,
      "loss": 3.0606,
      "step": 244900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.218699967979507e-05,
      "loss": 3.0772,
      "step": 245000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.770691394805908,
      "eval_runtime": 121.8482,
      "eval_samples_per_second": 82.069,
      "eval_steps_per_second": 5.129,
      "step": 245000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.218379763048351e-05,
      "loss": 3.0863,
      "step": 245100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.218059558117195e-05,
      "loss": 3.0684,
      "step": 245200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.217739353186039e-05,
      "loss": 3.0751,
      "step": 245300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.217419148254884e-05,
      "loss": 3.0841,
      "step": 245400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.217098943323727e-05,
      "loss": 3.0928,
      "step": 245500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.216778738392572e-05,
      "loss": 3.0602,
      "step": 245600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.216458533461416e-05,
      "loss": 3.0816,
      "step": 245700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.21613832853026e-05,
      "loss": 3.1112,
      "step": 245800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.215818123599104e-05,
      "loss": 3.067,
      "step": 245900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2154979186679476e-05,
      "loss": 3.0698,
      "step": 246000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.775249719619751,
      "eval_runtime": 110.2182,
      "eval_samples_per_second": 90.729,
      "eval_steps_per_second": 5.671,
      "step": 246000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2151777137367916e-05,
      "loss": 3.0675,
      "step": 246100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2148575088056356e-05,
      "loss": 3.0749,
      "step": 246200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.21453730387448e-05,
      "loss": 3.0867,
      "step": 246300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2142170989433235e-05,
      "loss": 3.062,
      "step": 246400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.213896894012168e-05,
      "loss": 3.0553,
      "step": 246500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.213576689081012e-05,
      "loss": 3.0865,
      "step": 246600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.213256484149856e-05,
      "loss": 3.0494,
      "step": 246700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2129362792187e-05,
      "loss": 3.0968,
      "step": 246800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.212616074287544e-05,
      "loss": 3.0885,
      "step": 246900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.212295869356389e-05,
      "loss": 3.0624,
      "step": 247000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7725865840911865,
      "eval_runtime": 110.9147,
      "eval_samples_per_second": 90.159,
      "eval_steps_per_second": 5.635,
      "step": 247000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.211975664425232e-05,
      "loss": 3.0616,
      "step": 247100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.211655459494077e-05,
      "loss": 3.0724,
      "step": 247200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2113352545629207e-05,
      "loss": 3.085,
      "step": 247300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2110150496317646e-05,
      "loss": 3.0896,
      "step": 247400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2106948447006086e-05,
      "loss": 3.1019,
      "step": 247500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2103746397694526e-05,
      "loss": 3.065,
      "step": 247600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.210054434838297e-05,
      "loss": 3.0713,
      "step": 247700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2097342299071405e-05,
      "loss": 3.0656,
      "step": 247800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.209414024975985e-05,
      "loss": 3.0475,
      "step": 247900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2090938200448285e-05,
      "loss": 3.1009,
      "step": 248000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7715604305267334,
      "eval_runtime": 110.3299,
      "eval_samples_per_second": 90.637,
      "eval_steps_per_second": 5.665,
      "step": 248000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.208773615113673e-05,
      "loss": 3.0641,
      "step": 248100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.208453410182517e-05,
      "loss": 3.0796,
      "step": 248200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.208133205251361e-05,
      "loss": 3.1047,
      "step": 248300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.207813000320205e-05,
      "loss": 3.09,
      "step": 248400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.207492795389049e-05,
      "loss": 3.0832,
      "step": 248500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.207172590457894e-05,
      "loss": 3.0763,
      "step": 248600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.206852385526737e-05,
      "loss": 3.0495,
      "step": 248700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2065321805955816e-05,
      "loss": 3.0677,
      "step": 248800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2062119756644256e-05,
      "loss": 3.0503,
      "step": 248900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2058917707332696e-05,
      "loss": 3.061,
      "step": 249000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.772599220275879,
      "eval_runtime": 110.155,
      "eval_samples_per_second": 90.781,
      "eval_steps_per_second": 5.674,
      "step": 249000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2055715658021135e-05,
      "loss": 3.0842,
      "step": 249100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2052513608709575e-05,
      "loss": 3.0522,
      "step": 249200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.204931155939802e-05,
      "loss": 3.0864,
      "step": 249300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2046109510086455e-05,
      "loss": 3.0619,
      "step": 249400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.20429074607749e-05,
      "loss": 3.0798,
      "step": 249500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2039705411463334e-05,
      "loss": 3.0825,
      "step": 249600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.203650336215178e-05,
      "loss": 3.0718,
      "step": 249700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.203330131284022e-05,
      "loss": 3.0977,
      "step": 249800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.203009926352866e-05,
      "loss": 3.0679,
      "step": 249900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2026897214217107e-05,
      "loss": 3.0528,
      "step": 250000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7718098163604736,
      "eval_runtime": 109.7514,
      "eval_samples_per_second": 91.115,
      "eval_steps_per_second": 5.695,
      "step": 250000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.202369516490554e-05,
      "loss": 3.0762,
      "step": 250100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2020493115593986e-05,
      "loss": 3.0735,
      "step": 250200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.201729106628242e-05,
      "loss": 3.0931,
      "step": 250300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2014089016970866e-05,
      "loss": 3.0641,
      "step": 250400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2010886967659305e-05,
      "loss": 3.0638,
      "step": 250500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2007684918347745e-05,
      "loss": 3.0682,
      "step": 250600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2004482869036185e-05,
      "loss": 3.0927,
      "step": 250700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.2001280819724624e-05,
      "loss": 3.0508,
      "step": 250800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.199807877041307e-05,
      "loss": 3.0785,
      "step": 250900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1994876721101504e-05,
      "loss": 3.0455,
      "step": 251000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.769622564315796,
      "eval_runtime": 109.5586,
      "eval_samples_per_second": 91.275,
      "eval_steps_per_second": 5.705,
      "step": 251000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.199167467178995e-05,
      "loss": 3.0707,
      "step": 251100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1988472622478383e-05,
      "loss": 3.0766,
      "step": 251200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.198527057316683e-05,
      "loss": 3.0788,
      "step": 251300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.198206852385527e-05,
      "loss": 3.0781,
      "step": 251400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.197886647454371e-05,
      "loss": 3.0447,
      "step": 251500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1975664425232156e-05,
      "loss": 3.0561,
      "step": 251600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.197246237592059e-05,
      "loss": 3.0564,
      "step": 251700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1969260326609035e-05,
      "loss": 3.0622,
      "step": 251800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.196605827729747e-05,
      "loss": 3.0607,
      "step": 251900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1962856227985915e-05,
      "loss": 3.0889,
      "step": 252000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7667477130889893,
      "eval_runtime": 109.5704,
      "eval_samples_per_second": 91.266,
      "eval_steps_per_second": 5.704,
      "step": 252000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1959654178674355e-05,
      "loss": 3.08,
      "step": 252100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1956452129362794e-05,
      "loss": 3.0626,
      "step": 252200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1953250080051234e-05,
      "loss": 3.0719,
      "step": 252300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1950048030739674e-05,
      "loss": 3.0745,
      "step": 252400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.194684598142812e-05,
      "loss": 3.0751,
      "step": 252500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.194364393211655e-05,
      "loss": 3.0792,
      "step": 252600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1940441882805e-05,
      "loss": 3.0892,
      "step": 252700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.193723983349343e-05,
      "loss": 3.1018,
      "step": 252800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.193403778418188e-05,
      "loss": 3.0995,
      "step": 252900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.193083573487032e-05,
      "loss": 3.1,
      "step": 253000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.769557476043701,
      "eval_runtime": 109.9726,
      "eval_samples_per_second": 90.932,
      "eval_steps_per_second": 5.683,
      "step": 253000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.192763368555876e-05,
      "loss": 3.0796,
      "step": 253100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1924431636247205e-05,
      "loss": 3.0815,
      "step": 253200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.192122958693564e-05,
      "loss": 3.0829,
      "step": 253300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1918027537624085e-05,
      "loss": 3.0697,
      "step": 253400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.191482548831252e-05,
      "loss": 3.0628,
      "step": 253500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1911623439000964e-05,
      "loss": 3.0788,
      "step": 253600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1908421389689404e-05,
      "loss": 3.0863,
      "step": 253700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1905219340377844e-05,
      "loss": 3.0654,
      "step": 253800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1902017291066283e-05,
      "loss": 3.0764,
      "step": 253900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.189881524175472e-05,
      "loss": 3.1014,
      "step": 254000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7723028659820557,
      "eval_runtime": 110.0242,
      "eval_samples_per_second": 90.889,
      "eval_steps_per_second": 5.681,
      "step": 254000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.189561319244317e-05,
      "loss": 3.0555,
      "step": 254100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.18924111431316e-05,
      "loss": 3.083,
      "step": 254200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.188920909382005e-05,
      "loss": 3.0796,
      "step": 254300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.188600704450848e-05,
      "loss": 3.0621,
      "step": 254400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.188280499519693e-05,
      "loss": 3.0562,
      "step": 254500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.187960294588537e-05,
      "loss": 3.0734,
      "step": 254600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.187640089657381e-05,
      "loss": 3.0572,
      "step": 254700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1873198847262255e-05,
      "loss": 3.0382,
      "step": 254800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.186999679795069e-05,
      "loss": 3.0589,
      "step": 254900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1866794748639134e-05,
      "loss": 3.0767,
      "step": 255000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.767193555831909,
      "eval_runtime": 110.4162,
      "eval_samples_per_second": 90.566,
      "eval_steps_per_second": 5.66,
      "step": 255000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.186359269932757e-05,
      "loss": 3.044,
      "step": 255100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1860390650016014e-05,
      "loss": 3.0464,
      "step": 255200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1857188600704453e-05,
      "loss": 3.0841,
      "step": 255300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.185398655139289e-05,
      "loss": 3.0773,
      "step": 255400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.185078450208133e-05,
      "loss": 3.0721,
      "step": 255500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.184758245276977e-05,
      "loss": 3.0676,
      "step": 255600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.184438040345822e-05,
      "loss": 3.0618,
      "step": 255700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.184117835414665e-05,
      "loss": 3.0776,
      "step": 255800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.18379763048351e-05,
      "loss": 3.0683,
      "step": 255900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.183477425552353e-05,
      "loss": 3.0872,
      "step": 256000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.7693660259246826,
      "eval_runtime": 110.469,
      "eval_samples_per_second": 90.523,
      "eval_steps_per_second": 5.658,
      "step": 256000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.183157220621198e-05,
      "loss": 3.0649,
      "step": 256100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.182837015690042e-05,
      "loss": 3.0791,
      "step": 256200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.182516810758886e-05,
      "loss": 3.0902,
      "step": 256300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1821966058277304e-05,
      "loss": 3.0928,
      "step": 256400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.181876400896574e-05,
      "loss": 3.0748,
      "step": 256500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1815561959654184e-05,
      "loss": 3.0787,
      "step": 256600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.1812359910342617e-05,
      "loss": 3.0744,
      "step": 256700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.180915786103106e-05,
      "loss": 3.0532,
      "step": 256800
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.18059558117195e-05,
      "loss": 3.1122,
      "step": 256900
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.180275376240794e-05,
      "loss": 3.0811,
      "step": 257000
    },
    {
      "epoch": 0.16,
      "eval_loss": 2.771117687225342,
      "eval_runtime": 109.7531,
      "eval_samples_per_second": 91.114,
      "eval_steps_per_second": 5.695,
      "step": 257000
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.179955171309638e-05,
      "loss": 3.0736,
      "step": 257100
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.179634966378482e-05,
      "loss": 3.0669,
      "step": 257200
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.179314761447327e-05,
      "loss": 3.0572,
      "step": 257300
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.17899455651617e-05,
      "loss": 3.0706,
      "step": 257400
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.178674351585015e-05,
      "loss": 3.0656,
      "step": 257500
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.178354146653859e-05,
      "loss": 3.0563,
      "step": 257600
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.178033941722703e-05,
      "loss": 3.0649,
      "step": 257700
    },
    {
      "epoch": 0.16,
      "learning_rate": 4.177713736791547e-05,
      "loss": 3.0766,
      "step": 257800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.177393531860391e-05,
      "loss": 3.0692,
      "step": 257900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1770733269292353e-05,
      "loss": 3.0725,
      "step": 258000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7685327529907227,
      "eval_runtime": 109.467,
      "eval_samples_per_second": 91.352,
      "eval_steps_per_second": 5.709,
      "step": 258000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1767531219980786e-05,
      "loss": 3.0913,
      "step": 258100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.176432917066923e-05,
      "loss": 3.1022,
      "step": 258200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1761127121357666e-05,
      "loss": 3.0876,
      "step": 258300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.175792507204611e-05,
      "loss": 3.0965,
      "step": 258400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.175472302273455e-05,
      "loss": 3.085,
      "step": 258500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.175152097342299e-05,
      "loss": 3.0735,
      "step": 258600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.174831892411143e-05,
      "loss": 3.0568,
      "step": 258700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.174511687479987e-05,
      "loss": 3.0817,
      "step": 258800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.174191482548832e-05,
      "loss": 3.1024,
      "step": 258900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.173871277617675e-05,
      "loss": 3.0476,
      "step": 259000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.769652843475342,
      "eval_runtime": 109.5413,
      "eval_samples_per_second": 91.29,
      "eval_steps_per_second": 5.706,
      "step": 259000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.17355107268652e-05,
      "loss": 3.0648,
      "step": 259100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.173230867755364e-05,
      "loss": 3.0642,
      "step": 259200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.172910662824208e-05,
      "loss": 3.0692,
      "step": 259300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1725904578930517e-05,
      "loss": 3.08,
      "step": 259400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1722702529618956e-05,
      "loss": 3.0807,
      "step": 259500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.17195004803074e-05,
      "loss": 3.067,
      "step": 259600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1716298430995836e-05,
      "loss": 3.0757,
      "step": 259700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.171309638168428e-05,
      "loss": 3.061,
      "step": 259800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.170989433237272e-05,
      "loss": 3.0435,
      "step": 259900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.170669228306116e-05,
      "loss": 3.0423,
      "step": 260000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.768566131591797,
      "eval_runtime": 109.5892,
      "eval_samples_per_second": 91.25,
      "eval_steps_per_second": 5.703,
      "step": 260000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.17034902337496e-05,
      "loss": 3.0714,
      "step": 260100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.170028818443804e-05,
      "loss": 3.0737,
      "step": 260200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.169708613512648e-05,
      "loss": 3.0776,
      "step": 260300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.169388408581492e-05,
      "loss": 3.0421,
      "step": 260400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.169068203650337e-05,
      "loss": 3.0573,
      "step": 260500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.16874799871918e-05,
      "loss": 3.0759,
      "step": 260600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.168427793788025e-05,
      "loss": 3.0614,
      "step": 260700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1681075888568686e-05,
      "loss": 3.0953,
      "step": 260800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1677873839257126e-05,
      "loss": 3.0668,
      "step": 260900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1674671789945566e-05,
      "loss": 3.0786,
      "step": 261000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7681474685668945,
      "eval_runtime": 109.6988,
      "eval_samples_per_second": 91.159,
      "eval_steps_per_second": 5.697,
      "step": 261000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1671469740634006e-05,
      "loss": 3.0518,
      "step": 261100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.166826769132245e-05,
      "loss": 3.0812,
      "step": 261200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1665065642010885e-05,
      "loss": 3.0702,
      "step": 261300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.166186359269933e-05,
      "loss": 3.0784,
      "step": 261400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.165866154338777e-05,
      "loss": 3.0968,
      "step": 261500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.165545949407621e-05,
      "loss": 3.0839,
      "step": 261600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.165225744476465e-05,
      "loss": 3.0873,
      "step": 261700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.164905539545309e-05,
      "loss": 3.0787,
      "step": 261800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.164585334614153e-05,
      "loss": 3.0525,
      "step": 261900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.164265129682997e-05,
      "loss": 3.0767,
      "step": 262000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.763681650161743,
      "eval_runtime": 110.9285,
      "eval_samples_per_second": 90.148,
      "eval_steps_per_second": 5.634,
      "step": 262000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1639449247518417e-05,
      "loss": 3.0334,
      "step": 262100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1636247198206856e-05,
      "loss": 3.0689,
      "step": 262200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1633045148895296e-05,
      "loss": 3.0618,
      "step": 262300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1629843099583736e-05,
      "loss": 3.0454,
      "step": 262400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1626641050272176e-05,
      "loss": 3.0572,
      "step": 262500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1623439000960615e-05,
      "loss": 3.0589,
      "step": 262600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1620236951649055e-05,
      "loss": 3.0554,
      "step": 262700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.16170349023375e-05,
      "loss": 3.062,
      "step": 262800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.161383285302594e-05,
      "loss": 3.0426,
      "step": 262900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.161063080371438e-05,
      "loss": 3.0607,
      "step": 263000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7654623985290527,
      "eval_runtime": 111.6342,
      "eval_samples_per_second": 89.578,
      "eval_steps_per_second": 5.599,
      "step": 263000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.160742875440282e-05,
      "loss": 3.0513,
      "step": 263100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.160422670509126e-05,
      "loss": 3.0728,
      "step": 263200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.16010246557797e-05,
      "loss": 3.0463,
      "step": 263300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.159782260646814e-05,
      "loss": 3.0627,
      "step": 263400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.159462055715658e-05,
      "loss": 3.0688,
      "step": 263500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.159141850784502e-05,
      "loss": 3.0846,
      "step": 263600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1588216458533466e-05,
      "loss": 3.0654,
      "step": 263700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1585014409221906e-05,
      "loss": 3.068,
      "step": 263800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1581812359910345e-05,
      "loss": 3.0479,
      "step": 263900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1578610310598785e-05,
      "loss": 3.0779,
      "step": 264000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7627549171447754,
      "eval_runtime": 110.1018,
      "eval_samples_per_second": 90.825,
      "eval_steps_per_second": 5.677,
      "step": 264000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1575408261287225e-05,
      "loss": 3.0547,
      "step": 264100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1572206211975665e-05,
      "loss": 3.0726,
      "step": 264200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1569004162664104e-05,
      "loss": 3.0651,
      "step": 264300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.156580211335255e-05,
      "loss": 3.0886,
      "step": 264400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.156260006404099e-05,
      "loss": 3.0631,
      "step": 264500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.155939801472943e-05,
      "loss": 3.069,
      "step": 264600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.155619596541787e-05,
      "loss": 3.0614,
      "step": 264700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.155299391610631e-05,
      "loss": 3.071,
      "step": 264800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.154979186679475e-05,
      "loss": 3.0675,
      "step": 264900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.154658981748319e-05,
      "loss": 3.0467,
      "step": 265000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7632060050964355,
      "eval_runtime": 110.0408,
      "eval_samples_per_second": 90.875,
      "eval_steps_per_second": 5.68,
      "step": 265000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.154338776817163e-05,
      "loss": 3.0574,
      "step": 265100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1540185718860076e-05,
      "loss": 3.0554,
      "step": 265200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1536983669548515e-05,
      "loss": 3.0991,
      "step": 265300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1533781620236955e-05,
      "loss": 3.0677,
      "step": 265400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1530579570925395e-05,
      "loss": 3.04,
      "step": 265500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1527377521613835e-05,
      "loss": 3.0649,
      "step": 265600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1524175472302274e-05,
      "loss": 3.055,
      "step": 265700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1520973422990714e-05,
      "loss": 3.0793,
      "step": 265800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1517771373679154e-05,
      "loss": 3.0471,
      "step": 265900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.15145693243676e-05,
      "loss": 3.0583,
      "step": 266000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7660090923309326,
      "eval_runtime": 109.8925,
      "eval_samples_per_second": 90.998,
      "eval_steps_per_second": 5.687,
      "step": 266000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.151136727505604e-05,
      "loss": 3.0695,
      "step": 266100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.150816522574448e-05,
      "loss": 3.0626,
      "step": 266200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.150496317643292e-05,
      "loss": 3.0335,
      "step": 266300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.150176112712136e-05,
      "loss": 3.0771,
      "step": 266400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.14985590778098e-05,
      "loss": 3.0444,
      "step": 266500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.149535702849824e-05,
      "loss": 3.0614,
      "step": 266600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.149215497918668e-05,
      "loss": 3.0576,
      "step": 266700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1488952929875125e-05,
      "loss": 3.0585,
      "step": 266800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1485750880563565e-05,
      "loss": 3.0735,
      "step": 266900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1482548831252004e-05,
      "loss": 3.0464,
      "step": 267000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.762979507446289,
      "eval_runtime": 110.0863,
      "eval_samples_per_second": 90.838,
      "eval_steps_per_second": 5.677,
      "step": 267000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1479346781940444e-05,
      "loss": 3.0513,
      "step": 267100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1476144732628884e-05,
      "loss": 3.0429,
      "step": 267200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1472942683317324e-05,
      "loss": 3.0865,
      "step": 267300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1469740634005763e-05,
      "loss": 3.0792,
      "step": 267400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.146653858469421e-05,
      "loss": 3.0364,
      "step": 267500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.146333653538265e-05,
      "loss": 3.0615,
      "step": 267600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.146013448607109e-05,
      "loss": 3.0796,
      "step": 267700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.145693243675953e-05,
      "loss": 3.0822,
      "step": 267800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.145373038744797e-05,
      "loss": 3.0893,
      "step": 267900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.145052833813641e-05,
      "loss": 3.0661,
      "step": 268000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7613704204559326,
      "eval_runtime": 110.1943,
      "eval_samples_per_second": 90.749,
      "eval_steps_per_second": 5.672,
      "step": 268000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.144732628882485e-05,
      "loss": 3.0516,
      "step": 268100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.144412423951329e-05,
      "loss": 3.0732,
      "step": 268200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.144092219020173e-05,
      "loss": 3.0739,
      "step": 268300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1437720140890174e-05,
      "loss": 3.0675,
      "step": 268400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1434518091578614e-05,
      "loss": 3.0685,
      "step": 268500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1431316042267054e-05,
      "loss": 3.0668,
      "step": 268600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1428113992955494e-05,
      "loss": 3.046,
      "step": 268700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.142491194364393e-05,
      "loss": 3.0446,
      "step": 268800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.142170989433237e-05,
      "loss": 3.0485,
      "step": 268900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.141850784502081e-05,
      "loss": 3.0875,
      "step": 269000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.762831449508667,
      "eval_runtime": 122.0797,
      "eval_samples_per_second": 81.914,
      "eval_steps_per_second": 5.12,
      "step": 269000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.141530579570926e-05,
      "loss": 3.0894,
      "step": 269100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.14121037463977e-05,
      "loss": 3.0424,
      "step": 269200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.140890169708614e-05,
      "loss": 3.0715,
      "step": 269300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.140569964777458e-05,
      "loss": 3.0603,
      "step": 269400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.140249759846302e-05,
      "loss": 3.0777,
      "step": 269500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.139929554915146e-05,
      "loss": 3.0763,
      "step": 269600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.13960934998399e-05,
      "loss": 3.0542,
      "step": 269700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1392891450528344e-05,
      "loss": 3.0579,
      "step": 269800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.138968940121678e-05,
      "loss": 3.0715,
      "step": 269900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1386487351905224e-05,
      "loss": 3.0787,
      "step": 270000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7610132694244385,
      "eval_runtime": 110.891,
      "eval_samples_per_second": 90.179,
      "eval_steps_per_second": 5.636,
      "step": 270000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1383285302593663e-05,
      "loss": 3.0653,
      "step": 270100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.13800832532821e-05,
      "loss": 3.055,
      "step": 270200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.137688120397054e-05,
      "loss": 3.0777,
      "step": 270300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.137367915465898e-05,
      "loss": 3.0685,
      "step": 270400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.137047710534742e-05,
      "loss": 3.0464,
      "step": 270500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.136727505603586e-05,
      "loss": 3.0634,
      "step": 270600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.136407300672431e-05,
      "loss": 3.064,
      "step": 270700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.136087095741275e-05,
      "loss": 3.0699,
      "step": 270800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.135766890810119e-05,
      "loss": 3.0813,
      "step": 270900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.135446685878963e-05,
      "loss": 3.074,
      "step": 271000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.761463165283203,
      "eval_runtime": 109.9733,
      "eval_samples_per_second": 90.931,
      "eval_steps_per_second": 5.683,
      "step": 271000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.135126480947807e-05,
      "loss": 3.066,
      "step": 271100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.134806276016651e-05,
      "loss": 3.0706,
      "step": 271200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.134486071085495e-05,
      "loss": 3.0534,
      "step": 271300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1341658661543394e-05,
      "loss": 3.0489,
      "step": 271400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1338456612231827e-05,
      "loss": 3.089,
      "step": 271500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.133525456292027e-05,
      "loss": 3.081,
      "step": 271600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.133205251360871e-05,
      "loss": 3.0627,
      "step": 271700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.132885046429715e-05,
      "loss": 3.0534,
      "step": 271800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.132564841498559e-05,
      "loss": 3.0585,
      "step": 271900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.132244636567403e-05,
      "loss": 3.0533,
      "step": 272000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7618422508239746,
      "eval_runtime": 122.4044,
      "eval_samples_per_second": 81.696,
      "eval_steps_per_second": 5.106,
      "step": 272000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.131924431636248e-05,
      "loss": 3.0567,
      "step": 272100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.131604226705091e-05,
      "loss": 3.0296,
      "step": 272200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.131284021773936e-05,
      "loss": 3.0578,
      "step": 272300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.13096381684278e-05,
      "loss": 3.0598,
      "step": 272400
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.130643611911624e-05,
      "loss": 3.056,
      "step": 272500
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.130323406980468e-05,
      "loss": 3.065,
      "step": 272600
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.130003202049312e-05,
      "loss": 3.0609,
      "step": 272700
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1296829971181564e-05,
      "loss": 3.0481,
      "step": 272800
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1293627921869996e-05,
      "loss": 3.0308,
      "step": 272900
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.129042587255844e-05,
      "loss": 3.0544,
      "step": 273000
    },
    {
      "epoch": 0.17,
      "eval_loss": 2.7620179653167725,
      "eval_runtime": 109.4974,
      "eval_samples_per_second": 91.326,
      "eval_steps_per_second": 5.708,
      "step": 273000
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.1287223823246876e-05,
      "loss": 3.0664,
      "step": 273100
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.128402177393532e-05,
      "loss": 3.0636,
      "step": 273200
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.128081972462376e-05,
      "loss": 3.0692,
      "step": 273300
    },
    {
      "epoch": 0.17,
      "learning_rate": 4.12776176753122e-05,
      "loss": 3.0522,
      "step": 273400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.127441562600064e-05,
      "loss": 3.0328,
      "step": 273500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.127121357668908e-05,
      "loss": 3.0584,
      "step": 273600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.126801152737753e-05,
      "loss": 3.0728,
      "step": 273700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.126480947806596e-05,
      "loss": 3.0574,
      "step": 273800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.126160742875441e-05,
      "loss": 3.0616,
      "step": 273900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.125840537944285e-05,
      "loss": 3.0552,
      "step": 274000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7582380771636963,
      "eval_runtime": 109.9177,
      "eval_samples_per_second": 90.977,
      "eval_steps_per_second": 5.686,
      "step": 274000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.125520333013129e-05,
      "loss": 3.0734,
      "step": 274100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.125200128081973e-05,
      "loss": 3.0449,
      "step": 274200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1248799231508166e-05,
      "loss": 3.0407,
      "step": 274300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.124559718219661e-05,
      "loss": 3.0733,
      "step": 274400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1242395132885046e-05,
      "loss": 3.0603,
      "step": 274500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.123919308357349e-05,
      "loss": 3.0548,
      "step": 274600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1235991034261925e-05,
      "loss": 3.0851,
      "step": 274700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.123278898495037e-05,
      "loss": 3.0694,
      "step": 274800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.122958693563881e-05,
      "loss": 3.0582,
      "step": 274900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.122638488632725e-05,
      "loss": 3.0449,
      "step": 275000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7580668926239014,
      "eval_runtime": 121.7085,
      "eval_samples_per_second": 82.164,
      "eval_steps_per_second": 5.135,
      "step": 275000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.12231828370157e-05,
      "loss": 3.0764,
      "step": 275100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.121998078770413e-05,
      "loss": 3.0497,
      "step": 275200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.121677873839258e-05,
      "loss": 3.0664,
      "step": 275300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.121357668908101e-05,
      "loss": 3.0497,
      "step": 275400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.121037463976946e-05,
      "loss": 3.061,
      "step": 275500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.120717259045789e-05,
      "loss": 3.0638,
      "step": 275600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1203970541146336e-05,
      "loss": 3.0705,
      "step": 275700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1200768491834776e-05,
      "loss": 3.0564,
      "step": 275800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1197566442523216e-05,
      "loss": 3.0476,
      "step": 275900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.119436439321166e-05,
      "loss": 3.0435,
      "step": 276000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.760873556137085,
      "eval_runtime": 109.8271,
      "eval_samples_per_second": 91.052,
      "eval_steps_per_second": 5.691,
      "step": 276000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1191162343900095e-05,
      "loss": 3.0673,
      "step": 276100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.118796029458854e-05,
      "loss": 3.0483,
      "step": 276200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1184758245276975e-05,
      "loss": 3.0507,
      "step": 276300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.118155619596542e-05,
      "loss": 3.0684,
      "step": 276400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.117835414665386e-05,
      "loss": 3.0262,
      "step": 276500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.11751520973423e-05,
      "loss": 3.0691,
      "step": 276600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.117195004803075e-05,
      "loss": 3.0591,
      "step": 276700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.116874799871918e-05,
      "loss": 3.0497,
      "step": 276800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.116554594940763e-05,
      "loss": 3.053,
      "step": 276900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.116234390009606e-05,
      "loss": 3.0545,
      "step": 277000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.757499933242798,
      "eval_runtime": 109.9135,
      "eval_samples_per_second": 90.981,
      "eval_steps_per_second": 5.686,
      "step": 277000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1159141850784506e-05,
      "loss": 3.0532,
      "step": 277100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.115593980147294e-05,
      "loss": 3.0752,
      "step": 277200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1152737752161386e-05,
      "loss": 3.062,
      "step": 277300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1149535702849825e-05,
      "loss": 3.0634,
      "step": 277400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1146333653538265e-05,
      "loss": 3.0671,
      "step": 277500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.114313160422671e-05,
      "loss": 3.0842,
      "step": 277600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1139929554915145e-05,
      "loss": 3.0774,
      "step": 277700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.113672750560359e-05,
      "loss": 3.0527,
      "step": 277800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1133525456292024e-05,
      "loss": 3.0815,
      "step": 277900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.113032340698047e-05,
      "loss": 3.0535,
      "step": 278000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.758791208267212,
      "eval_runtime": 110.1888,
      "eval_samples_per_second": 90.753,
      "eval_steps_per_second": 5.672,
      "step": 278000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.112712135766891e-05,
      "loss": 3.0529,
      "step": 278100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.112391930835735e-05,
      "loss": 3.0283,
      "step": 278200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1120717259045797e-05,
      "loss": 3.0413,
      "step": 278300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.111751520973423e-05,
      "loss": 3.0462,
      "step": 278400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1114313160422676e-05,
      "loss": 3.0548,
      "step": 278500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.111111111111111e-05,
      "loss": 3.0899,
      "step": 278600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1107909061799556e-05,
      "loss": 3.081,
      "step": 278700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.110470701248799e-05,
      "loss": 3.0469,
      "step": 278800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1101504963176435e-05,
      "loss": 3.0425,
      "step": 278900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1098302913864875e-05,
      "loss": 3.0581,
      "step": 279000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7582361698150635,
      "eval_runtime": 110.5179,
      "eval_samples_per_second": 90.483,
      "eval_steps_per_second": 5.655,
      "step": 279000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1095100864553314e-05,
      "loss": 3.0246,
      "step": 279100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.109189881524176e-05,
      "loss": 3.0726,
      "step": 279200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1088696765930194e-05,
      "loss": 3.0561,
      "step": 279300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.108549471661864e-05,
      "loss": 3.0787,
      "step": 279400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1082292667307073e-05,
      "loss": 3.0674,
      "step": 279500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.107909061799552e-05,
      "loss": 3.0729,
      "step": 279600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.107588856868396e-05,
      "loss": 3.0645,
      "step": 279700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.10726865193724e-05,
      "loss": 3.0614,
      "step": 279800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1069484470060846e-05,
      "loss": 3.0831,
      "step": 279900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.106628242074928e-05,
      "loss": 3.0681,
      "step": 280000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7586588859558105,
      "eval_runtime": 122.59,
      "eval_samples_per_second": 81.573,
      "eval_steps_per_second": 5.098,
      "step": 280000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1063080371437725e-05,
      "loss": 3.0688,
      "step": 280100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.105987832212616e-05,
      "loss": 3.046,
      "step": 280200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1056676272814605e-05,
      "loss": 3.0563,
      "step": 280300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.105347422350304e-05,
      "loss": 3.0691,
      "step": 280400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1050272174191484e-05,
      "loss": 3.0777,
      "step": 280500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1047070124879924e-05,
      "loss": 3.072,
      "step": 280600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1043868075568364e-05,
      "loss": 3.0497,
      "step": 280700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.104066602625681e-05,
      "loss": 3.0513,
      "step": 280800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.103746397694524e-05,
      "loss": 3.0403,
      "step": 280900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.103426192763369e-05,
      "loss": 3.0731,
      "step": 281000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7550809383392334,
      "eval_runtime": 110.1981,
      "eval_samples_per_second": 90.746,
      "eval_steps_per_second": 5.672,
      "step": 281000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.103105987832212e-05,
      "loss": 3.064,
      "step": 281100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.102785782901057e-05,
      "loss": 3.0703,
      "step": 281200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.102465577969901e-05,
      "loss": 3.0517,
      "step": 281300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.102145373038745e-05,
      "loss": 3.0517,
      "step": 281400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1018251681075895e-05,
      "loss": 3.0512,
      "step": 281500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.101504963176433e-05,
      "loss": 3.0691,
      "step": 281600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1011847582452775e-05,
      "loss": 3.0418,
      "step": 281700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.100864553314121e-05,
      "loss": 3.0742,
      "step": 281800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1005443483829654e-05,
      "loss": 3.0478,
      "step": 281900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.1002241434518094e-05,
      "loss": 3.0713,
      "step": 282000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7563815116882324,
      "eval_runtime": 109.8213,
      "eval_samples_per_second": 91.057,
      "eval_steps_per_second": 5.691,
      "step": 282000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0999039385206534e-05,
      "loss": 3.0635,
      "step": 282100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0995837335894973e-05,
      "loss": 3.0613,
      "step": 282200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.099263528658341e-05,
      "loss": 3.061,
      "step": 282300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.098943323727186e-05,
      "loss": 3.0429,
      "step": 282400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.098623118796029e-05,
      "loss": 3.0643,
      "step": 282500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.098302913864874e-05,
      "loss": 3.0447,
      "step": 282600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.097982708933718e-05,
      "loss": 3.0609,
      "step": 282700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.097662504002562e-05,
      "loss": 3.0482,
      "step": 282800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.097342299071406e-05,
      "loss": 3.0754,
      "step": 282900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.09702209414025e-05,
      "loss": 3.0639,
      "step": 283000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.756375312805176,
      "eval_runtime": 122.3084,
      "eval_samples_per_second": 81.761,
      "eval_steps_per_second": 5.11,
      "step": 283000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0967018892090945e-05,
      "loss": 3.0475,
      "step": 283100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.096381684277938e-05,
      "loss": 3.056,
      "step": 283200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0960614793467824e-05,
      "loss": 3.0474,
      "step": 283300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.095741274415626e-05,
      "loss": 3.0383,
      "step": 283400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0954210694844704e-05,
      "loss": 3.0453,
      "step": 283500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.095100864553314e-05,
      "loss": 3.0549,
      "step": 283600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.094780659622158e-05,
      "loss": 3.0724,
      "step": 283700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.094460454691002e-05,
      "loss": 3.0607,
      "step": 283800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.094140249759846e-05,
      "loss": 3.0671,
      "step": 283900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.093820044828691e-05,
      "loss": 3.0317,
      "step": 284000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.759157419204712,
      "eval_runtime": 109.8312,
      "eval_samples_per_second": 91.049,
      "eval_steps_per_second": 5.691,
      "step": 284000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.093499839897534e-05,
      "loss": 3.0452,
      "step": 284100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.093179634966379e-05,
      "loss": 3.0628,
      "step": 284200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.092859430035223e-05,
      "loss": 3.083,
      "step": 284300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.092539225104067e-05,
      "loss": 3.0478,
      "step": 284400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.092219020172911e-05,
      "loss": 3.042,
      "step": 284500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.091898815241755e-05,
      "loss": 3.0639,
      "step": 284600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0915786103105994e-05,
      "loss": 3.0808,
      "step": 284700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.091258405379443e-05,
      "loss": 3.0293,
      "step": 284800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0909382004482874e-05,
      "loss": 3.0941,
      "step": 284900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.090617995517131e-05,
      "loss": 3.0524,
      "step": 285000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7574338912963867,
      "eval_runtime": 109.7779,
      "eval_samples_per_second": 91.093,
      "eval_steps_per_second": 5.693,
      "step": 285000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.090297790585975e-05,
      "loss": 3.0829,
      "step": 285100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.089977585654819e-05,
      "loss": 3.0582,
      "step": 285200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.089657380723663e-05,
      "loss": 3.0262,
      "step": 285300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.089337175792507e-05,
      "loss": 3.0552,
      "step": 285400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.089016970861351e-05,
      "loss": 3.0378,
      "step": 285500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.088696765930196e-05,
      "loss": 3.0624,
      "step": 285600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.088376560999039e-05,
      "loss": 3.0707,
      "step": 285700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.088056356067884e-05,
      "loss": 3.0693,
      "step": 285800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.087736151136728e-05,
      "loss": 3.0739,
      "step": 285900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.087415946205572e-05,
      "loss": 3.0483,
      "step": 286000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.756641387939453,
      "eval_runtime": 109.9213,
      "eval_samples_per_second": 90.974,
      "eval_steps_per_second": 5.686,
      "step": 286000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.087095741274416e-05,
      "loss": 3.0538,
      "step": 286100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.08677553634326e-05,
      "loss": 3.0713,
      "step": 286200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0864553314121043e-05,
      "loss": 3.0814,
      "step": 286300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0861351264809476e-05,
      "loss": 3.0558,
      "step": 286400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.085814921549792e-05,
      "loss": 3.0523,
      "step": 286500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.085494716618636e-05,
      "loss": 3.0815,
      "step": 286600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.08517451168748e-05,
      "loss": 3.0635,
      "step": 286700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.084854306756324e-05,
      "loss": 3.0908,
      "step": 286800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.084534101825168e-05,
      "loss": 3.0898,
      "step": 286900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.084213896894012e-05,
      "loss": 3.048,
      "step": 287000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7541558742523193,
      "eval_runtime": 110.1506,
      "eval_samples_per_second": 90.785,
      "eval_steps_per_second": 5.674,
      "step": 287000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.083893691962856e-05,
      "loss": 3.0432,
      "step": 287100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.083573487031701e-05,
      "loss": 3.0596,
      "step": 287200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.083253282100545e-05,
      "loss": 3.0728,
      "step": 287300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.082933077169389e-05,
      "loss": 3.0853,
      "step": 287400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.082612872238233e-05,
      "loss": 3.0634,
      "step": 287500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.082292667307077e-05,
      "loss": 3.0568,
      "step": 287600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0819724623759207e-05,
      "loss": 3.0467,
      "step": 287700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0816522574447646e-05,
      "loss": 3.0555,
      "step": 287800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.081332052513609e-05,
      "loss": 3.0715,
      "step": 287900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.0810118475824526e-05,
      "loss": 3.0627,
      "step": 288000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.7545204162597656,
      "eval_runtime": 122.6731,
      "eval_samples_per_second": 81.517,
      "eval_steps_per_second": 5.095,
      "step": 288000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.080691642651297e-05,
      "loss": 3.0457,
      "step": 288100
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.080371437720141e-05,
      "loss": 3.0335,
      "step": 288200
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.080051232788985e-05,
      "loss": 3.0661,
      "step": 288300
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.079731027857829e-05,
      "loss": 3.055,
      "step": 288400
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.079410822926673e-05,
      "loss": 3.0553,
      "step": 288500
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.079090617995517e-05,
      "loss": 3.0688,
      "step": 288600
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.078770413064361e-05,
      "loss": 3.0463,
      "step": 288700
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.078450208133206e-05,
      "loss": 3.058,
      "step": 288800
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.07813000320205e-05,
      "loss": 3.053,
      "step": 288900
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.077809798270894e-05,
      "loss": 3.0666,
      "step": 289000
    },
    {
      "epoch": 0.18,
      "eval_loss": 2.756013870239258,
      "eval_runtime": 110.206,
      "eval_samples_per_second": 90.739,
      "eval_steps_per_second": 5.671,
      "step": 289000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0774895933397376e-05,
      "loss": 3.0577,
      "step": 289100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0771693884085816e-05,
      "loss": 3.0464,
      "step": 289200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0768491834774256e-05,
      "loss": 3.0563,
      "step": 289300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0765289785462696e-05,
      "loss": 3.0569,
      "step": 289400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.076208773615114e-05,
      "loss": 3.047,
      "step": 289500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.075888568683958e-05,
      "loss": 3.0466,
      "step": 289600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.075568363752802e-05,
      "loss": 3.0462,
      "step": 289700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.075248158821646e-05,
      "loss": 3.048,
      "step": 289800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.07492795389049e-05,
      "loss": 3.0393,
      "step": 289900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.074607748959334e-05,
      "loss": 3.073,
      "step": 290000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7545957565307617,
      "eval_runtime": 109.7483,
      "eval_samples_per_second": 91.118,
      "eval_steps_per_second": 5.695,
      "step": 290000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.074287544028178e-05,
      "loss": 3.0977,
      "step": 290100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.073967339097022e-05,
      "loss": 3.0551,
      "step": 290200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.073647134165866e-05,
      "loss": 3.0697,
      "step": 290300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0733269292347107e-05,
      "loss": 3.0386,
      "step": 290400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0730067243035546e-05,
      "loss": 3.0284,
      "step": 290500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0726865193723986e-05,
      "loss": 3.0633,
      "step": 290600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0723663144412426e-05,
      "loss": 3.0173,
      "step": 290700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0720461095100866e-05,
      "loss": 3.0529,
      "step": 290800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0717259045789305e-05,
      "loss": 3.0376,
      "step": 290900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0714056996477745e-05,
      "loss": 3.048,
      "step": 291000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7558786869049072,
      "eval_runtime": 109.7319,
      "eval_samples_per_second": 91.131,
      "eval_steps_per_second": 5.696,
      "step": 291000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.071085494716619e-05,
      "loss": 3.0413,
      "step": 291100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.070765289785463e-05,
      "loss": 3.0518,
      "step": 291200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.070445084854307e-05,
      "loss": 3.0309,
      "step": 291300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.070124879923151e-05,
      "loss": 3.0301,
      "step": 291400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.069804674991995e-05,
      "loss": 3.0113,
      "step": 291500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.069484470060839e-05,
      "loss": 3.0375,
      "step": 291600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.069164265129683e-05,
      "loss": 3.038,
      "step": 291700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.068844060198527e-05,
      "loss": 3.0741,
      "step": 291800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0685238552673716e-05,
      "loss": 3.0503,
      "step": 291900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0682036503362156e-05,
      "loss": 3.0693,
      "step": 292000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7553672790527344,
      "eval_runtime": 109.5887,
      "eval_samples_per_second": 91.25,
      "eval_steps_per_second": 5.703,
      "step": 292000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0678834454050596e-05,
      "loss": 3.0595,
      "step": 292100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0675632404739035e-05,
      "loss": 3.0428,
      "step": 292200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0672430355427475e-05,
      "loss": 3.057,
      "step": 292300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0669228306115915e-05,
      "loss": 3.0453,
      "step": 292400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0666026256804355e-05,
      "loss": 3.0227,
      "step": 292500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.06628242074928e-05,
      "loss": 3.039,
      "step": 292600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.065962215818124e-05,
      "loss": 3.0589,
      "step": 292700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.065642010886968e-05,
      "loss": 3.0318,
      "step": 292800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.065321805955812e-05,
      "loss": 3.0427,
      "step": 292900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.065001601024656e-05,
      "loss": 3.049,
      "step": 293000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.753833055496216,
      "eval_runtime": 109.793,
      "eval_samples_per_second": 91.08,
      "eval_steps_per_second": 5.693,
      "step": 293000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0646813960935e-05,
      "loss": 3.0689,
      "step": 293100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.064361191162344e-05,
      "loss": 3.0389,
      "step": 293200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.064040986231188e-05,
      "loss": 3.0376,
      "step": 293300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.063720781300032e-05,
      "loss": 3.0413,
      "step": 293400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0634005763688766e-05,
      "loss": 3.0376,
      "step": 293500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0630803714377205e-05,
      "loss": 3.029,
      "step": 293600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0627601665065645e-05,
      "loss": 3.0508,
      "step": 293700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0624399615754085e-05,
      "loss": 3.0251,
      "step": 293800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0621197566442525e-05,
      "loss": 3.035,
      "step": 293900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0617995517130964e-05,
      "loss": 3.0253,
      "step": 294000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.755887031555176,
      "eval_runtime": 110.0411,
      "eval_samples_per_second": 90.875,
      "eval_steps_per_second": 5.68,
      "step": 294000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0614793467819404e-05,
      "loss": 3.0341,
      "step": 294100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.061159141850785e-05,
      "loss": 3.0645,
      "step": 294200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0608389369196284e-05,
      "loss": 3.0438,
      "step": 294300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.060518731988473e-05,
      "loss": 3.0005,
      "step": 294400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.060198527057317e-05,
      "loss": 3.0445,
      "step": 294500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.059878322126161e-05,
      "loss": 3.0432,
      "step": 294600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.059558117195005e-05,
      "loss": 3.0536,
      "step": 294700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.059237912263849e-05,
      "loss": 3.0445,
      "step": 294800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0589177073326935e-05,
      "loss": 3.0358,
      "step": 294900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.058597502401537e-05,
      "loss": 3.0265,
      "step": 295000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7549071311950684,
      "eval_runtime": 124.2271,
      "eval_samples_per_second": 80.498,
      "eval_steps_per_second": 5.031,
      "step": 295000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0582772974703815e-05,
      "loss": 3.0207,
      "step": 295100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0579570925392255e-05,
      "loss": 3.0434,
      "step": 295200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0576368876080694e-05,
      "loss": 3.0332,
      "step": 295300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0573166826769134e-05,
      "loss": 3.0603,
      "step": 295400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0569964777457574e-05,
      "loss": 3.0567,
      "step": 295500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0566762728146014e-05,
      "loss": 3.0058,
      "step": 295600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0563560678834453e-05,
      "loss": 3.0274,
      "step": 295700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.05603586295229e-05,
      "loss": 3.037,
      "step": 295800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.055715658021133e-05,
      "loss": 3.0271,
      "step": 295900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.055395453089978e-05,
      "loss": 3.0154,
      "step": 296000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7562572956085205,
      "eval_runtime": 110.0615,
      "eval_samples_per_second": 90.858,
      "eval_steps_per_second": 5.679,
      "step": 296000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.055075248158822e-05,
      "loss": 3.0107,
      "step": 296100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.054755043227666e-05,
      "loss": 3.058,
      "step": 296200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.05443483829651e-05,
      "loss": 3.045,
      "step": 296300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.054114633365354e-05,
      "loss": 3.0169,
      "step": 296400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0537944284341985e-05,
      "loss": 3.0106,
      "step": 296500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.053474223503042e-05,
      "loss": 3.0282,
      "step": 296600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0531540185718864e-05,
      "loss": 2.9942,
      "step": 296700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0528338136407304e-05,
      "loss": 3.0503,
      "step": 296800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0525136087095744e-05,
      "loss": 3.036,
      "step": 296900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0521934037784184e-05,
      "loss": 3.0267,
      "step": 297000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.75506329536438,
      "eval_runtime": 109.9678,
      "eval_samples_per_second": 90.936,
      "eval_steps_per_second": 5.683,
      "step": 297000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.051873198847262e-05,
      "loss": 3.042,
      "step": 297100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.051552993916107e-05,
      "loss": 3.0391,
      "step": 297200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.05123278898495e-05,
      "loss": 3.0349,
      "step": 297300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.050912584053795e-05,
      "loss": 3.0407,
      "step": 297400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.050592379122638e-05,
      "loss": 2.9982,
      "step": 297500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.050272174191483e-05,
      "loss": 3.037,
      "step": 297600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.049951969260327e-05,
      "loss": 3.0584,
      "step": 297700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.049631764329171e-05,
      "loss": 3.0611,
      "step": 297800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.049311559398015e-05,
      "loss": 3.0557,
      "step": 297900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.048991354466859e-05,
      "loss": 3.0425,
      "step": 298000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7522401809692383,
      "eval_runtime": 110.1324,
      "eval_samples_per_second": 90.8,
      "eval_steps_per_second": 5.675,
      "step": 298000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0486711495357034e-05,
      "loss": 3.0156,
      "step": 298100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.048350944604547e-05,
      "loss": 3.0858,
      "step": 298200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0480307396733914e-05,
      "loss": 3.0667,
      "step": 298300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0477105347422353e-05,
      "loss": 3.0318,
      "step": 298400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.047390329811079e-05,
      "loss": 3.0468,
      "step": 298500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.047070124879923e-05,
      "loss": 3.0385,
      "step": 298600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.046749919948767e-05,
      "loss": 3.0386,
      "step": 298700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.046429715017612e-05,
      "loss": 3.0631,
      "step": 298800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.046109510086455e-05,
      "loss": 3.0648,
      "step": 298900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0457893051553e-05,
      "loss": 3.0412,
      "step": 299000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7513413429260254,
      "eval_runtime": 110.0426,
      "eval_samples_per_second": 90.874,
      "eval_steps_per_second": 5.68,
      "step": 299000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.045469100224143e-05,
      "loss": 3.0551,
      "step": 299100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.045148895292988e-05,
      "loss": 3.0441,
      "step": 299200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.044828690361832e-05,
      "loss": 3.0394,
      "step": 299300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.044508485430676e-05,
      "loss": 3.0701,
      "step": 299400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0441882804995204e-05,
      "loss": 3.0729,
      "step": 299500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.043868075568364e-05,
      "loss": 3.0246,
      "step": 299600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0435478706372084e-05,
      "loss": 3.0595,
      "step": 299700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0432276657060517e-05,
      "loss": 3.042,
      "step": 299800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.042907460774896e-05,
      "loss": 3.0317,
      "step": 299900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.04258725584374e-05,
      "loss": 3.0583,
      "step": 300000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7533600330352783,
      "eval_runtime": 122.968,
      "eval_samples_per_second": 81.322,
      "eval_steps_per_second": 5.083,
      "step": 300000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.042267050912584e-05,
      "loss": 3.0591,
      "step": 300100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.041946845981428e-05,
      "loss": 3.067,
      "step": 300200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.041626641050272e-05,
      "loss": 3.066,
      "step": 300300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.041306436119117e-05,
      "loss": 3.0392,
      "step": 300400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.04098623118796e-05,
      "loss": 3.0644,
      "step": 300500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.040666026256805e-05,
      "loss": 3.0485,
      "step": 300600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.040345821325648e-05,
      "loss": 3.0348,
      "step": 300700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.040025616394493e-05,
      "loss": 3.0611,
      "step": 300800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.039705411463337e-05,
      "loss": 3.0732,
      "step": 300900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.039385206532181e-05,
      "loss": 3.063,
      "step": 301000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7503695487976074,
      "eval_runtime": 109.9313,
      "eval_samples_per_second": 90.966,
      "eval_steps_per_second": 5.685,
      "step": 301000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0390650016010254e-05,
      "loss": 3.0649,
      "step": 301100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0387447966698686e-05,
      "loss": 3.0523,
      "step": 301200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.038424591738713e-05,
      "loss": 3.0669,
      "step": 301300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0381043868075566e-05,
      "loss": 3.0507,
      "step": 301400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.037784181876401e-05,
      "loss": 3.0659,
      "step": 301500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.037463976945245e-05,
      "loss": 3.0634,
      "step": 301600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.037143772014089e-05,
      "loss": 3.0231,
      "step": 301700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.036823567082934e-05,
      "loss": 3.0675,
      "step": 301800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.036503362151777e-05,
      "loss": 3.0569,
      "step": 301900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.036183157220622e-05,
      "loss": 3.0502,
      "step": 302000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.750575304031372,
      "eval_runtime": 109.4473,
      "eval_samples_per_second": 91.368,
      "eval_steps_per_second": 5.711,
      "step": 302000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.035862952289465e-05,
      "loss": 3.0331,
      "step": 302100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.03554274735831e-05,
      "loss": 3.0378,
      "step": 302200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.035222542427153e-05,
      "loss": 3.0516,
      "step": 302300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.034902337495998e-05,
      "loss": 3.0422,
      "step": 302400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.034582132564842e-05,
      "loss": 3.0468,
      "step": 302500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0342619276336856e-05,
      "loss": 3.0443,
      "step": 302600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.03394172270253e-05,
      "loss": 3.0779,
      "step": 302700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0336215177713736e-05,
      "loss": 3.0268,
      "step": 302800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.033301312840218e-05,
      "loss": 3.0659,
      "step": 302900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0329811079090615e-05,
      "loss": 3.059,
      "step": 303000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7532153129577637,
      "eval_runtime": 109.8537,
      "eval_samples_per_second": 91.03,
      "eval_steps_per_second": 5.689,
      "step": 303000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.032660902977906e-05,
      "loss": 3.0668,
      "step": 303100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.03234069804675e-05,
      "loss": 3.0617,
      "step": 303200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.032020493115594e-05,
      "loss": 3.0506,
      "step": 303300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.031700288184439e-05,
      "loss": 3.0412,
      "step": 303400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.031380083253282e-05,
      "loss": 3.0529,
      "step": 303500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.031059878322127e-05,
      "loss": 3.054,
      "step": 303600
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.03073967339097e-05,
      "loss": 3.0508,
      "step": 303700
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.030419468459815e-05,
      "loss": 3.0313,
      "step": 303800
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.030099263528658e-05,
      "loss": 3.0681,
      "step": 303900
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0297790585975026e-05,
      "loss": 3.0591,
      "step": 304000
    },
    {
      "epoch": 0.19,
      "eval_loss": 2.7480616569519043,
      "eval_runtime": 110.045,
      "eval_samples_per_second": 90.872,
      "eval_steps_per_second": 5.679,
      "step": 304000
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0294588536663466e-05,
      "loss": 3.037,
      "step": 304100
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0291386487351906e-05,
      "loss": 3.0559,
      "step": 304200
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.028818443804035e-05,
      "loss": 3.0471,
      "step": 304300
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0284982388728785e-05,
      "loss": 3.042,
      "step": 304400
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.028178033941723e-05,
      "loss": 3.0366,
      "step": 304500
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.0278578290105665e-05,
      "loss": 3.0529,
      "step": 304600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.027537624079411e-05,
      "loss": 3.058,
      "step": 304700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.027217419148255e-05,
      "loss": 3.0439,
      "step": 304800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.026897214217099e-05,
      "loss": 3.0362,
      "step": 304900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.026577009285944e-05,
      "loss": 3.0785,
      "step": 305000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.74845814704895,
      "eval_runtime": 109.7783,
      "eval_samples_per_second": 91.093,
      "eval_steps_per_second": 5.693,
      "step": 305000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.026256804354787e-05,
      "loss": 3.0531,
      "step": 305100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.025936599423632e-05,
      "loss": 3.0604,
      "step": 305200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.025616394492475e-05,
      "loss": 3.0735,
      "step": 305300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0252961895613196e-05,
      "loss": 3.0648,
      "step": 305400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.024975984630163e-05,
      "loss": 3.0525,
      "step": 305500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0246557796990076e-05,
      "loss": 3.0215,
      "step": 305600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0243355747678515e-05,
      "loss": 3.0498,
      "step": 305700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0240153698366955e-05,
      "loss": 3.0425,
      "step": 305800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.02369516490554e-05,
      "loss": 3.0421,
      "step": 305900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0233749599743835e-05,
      "loss": 3.0394,
      "step": 306000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.751927375793457,
      "eval_runtime": 110.2537,
      "eval_samples_per_second": 90.7,
      "eval_steps_per_second": 5.669,
      "step": 306000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.023054755043228e-05,
      "loss": 3.0441,
      "step": 306100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0227345501120714e-05,
      "loss": 3.0538,
      "step": 306200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.022414345180916e-05,
      "loss": 3.0555,
      "step": 306300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.02209414024976e-05,
      "loss": 3.043,
      "step": 306400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.021773935318604e-05,
      "loss": 3.0662,
      "step": 306500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0214537303874487e-05,
      "loss": 3.0363,
      "step": 306600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.021133525456292e-05,
      "loss": 3.0643,
      "step": 306700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0208133205251366e-05,
      "loss": 3.0409,
      "step": 306800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.02049311559398e-05,
      "loss": 3.0703,
      "step": 306900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0201729106628246e-05,
      "loss": 3.0514,
      "step": 307000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7502689361572266,
      "eval_runtime": 123.1227,
      "eval_samples_per_second": 81.22,
      "eval_steps_per_second": 5.076,
      "step": 307000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0198527057316685e-05,
      "loss": 3.0446,
      "step": 307100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0195325008005125e-05,
      "loss": 3.0512,
      "step": 307200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0192122958693565e-05,
      "loss": 3.0446,
      "step": 307300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0188920909382004e-05,
      "loss": 3.0693,
      "step": 307400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.018571886007045e-05,
      "loss": 3.0516,
      "step": 307500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0182516810758884e-05,
      "loss": 3.0331,
      "step": 307600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.017931476144733e-05,
      "loss": 3.0336,
      "step": 307700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0176112712135763e-05,
      "loss": 3.0573,
      "step": 307800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.017291066282421e-05,
      "loss": 3.0282,
      "step": 307900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.016970861351265e-05,
      "loss": 3.0359,
      "step": 308000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7488203048706055,
      "eval_runtime": 109.8355,
      "eval_samples_per_second": 91.045,
      "eval_steps_per_second": 5.69,
      "step": 308000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.016650656420109e-05,
      "loss": 3.054,
      "step": 308100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0163304514889536e-05,
      "loss": 3.0554,
      "step": 308200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.016010246557797e-05,
      "loss": 3.028,
      "step": 308300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0156900416266415e-05,
      "loss": 2.9975,
      "step": 308400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.015369836695485e-05,
      "loss": 3.0445,
      "step": 308500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0150496317643295e-05,
      "loss": 3.0254,
      "step": 308600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0147294268331735e-05,
      "loss": 3.0251,
      "step": 308700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0144092219020174e-05,
      "loss": 3.0879,
      "step": 308800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0140890169708614e-05,
      "loss": 3.0413,
      "step": 308900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0137688120397054e-05,
      "loss": 3.0516,
      "step": 309000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7512829303741455,
      "eval_runtime": 123.0687,
      "eval_samples_per_second": 81.255,
      "eval_steps_per_second": 5.078,
      "step": 309000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.01344860710855e-05,
      "loss": 3.0265,
      "step": 309100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.013128402177393e-05,
      "loss": 3.0678,
      "step": 309200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.012808197246238e-05,
      "loss": 3.0562,
      "step": 309300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.012487992315082e-05,
      "loss": 3.055,
      "step": 309400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.012167787383926e-05,
      "loss": 3.0346,
      "step": 309500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.01184758245277e-05,
      "loss": 3.0292,
      "step": 309600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.011527377521614e-05,
      "loss": 3.0296,
      "step": 309700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0112071725904585e-05,
      "loss": 3.0341,
      "step": 309800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.010886967659302e-05,
      "loss": 3.0316,
      "step": 309900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0105667627281465e-05,
      "loss": 3.0422,
      "step": 310000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.747607469558716,
      "eval_runtime": 109.6832,
      "eval_samples_per_second": 91.172,
      "eval_steps_per_second": 5.698,
      "step": 310000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.01024655779699e-05,
      "loss": 3.0231,
      "step": 310100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0099263528658344e-05,
      "loss": 3.0591,
      "step": 310200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0096061479346784e-05,
      "loss": 3.0367,
      "step": 310300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0092859430035224e-05,
      "loss": 3.0019,
      "step": 310400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0089657380723663e-05,
      "loss": 3.0472,
      "step": 310500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.00864553314121e-05,
      "loss": 3.0188,
      "step": 310600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.008325328210055e-05,
      "loss": 3.0586,
      "step": 310700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.008005123278898e-05,
      "loss": 3.0367,
      "step": 310800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.007684918347743e-05,
      "loss": 3.0538,
      "step": 310900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.007364713416587e-05,
      "loss": 3.0547,
      "step": 311000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7486965656280518,
      "eval_runtime": 110.1682,
      "eval_samples_per_second": 90.77,
      "eval_steps_per_second": 5.673,
      "step": 311000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.007044508485431e-05,
      "loss": 3.0304,
      "step": 311100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.006724303554275e-05,
      "loss": 3.0517,
      "step": 311200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.006404098623119e-05,
      "loss": 3.0512,
      "step": 311300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0060838936919635e-05,
      "loss": 3.0537,
      "step": 311400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.005763688760807e-05,
      "loss": 3.031,
      "step": 311500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0054434838296514e-05,
      "loss": 3.055,
      "step": 311600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0051232788984954e-05,
      "loss": 3.0509,
      "step": 311700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0048030739673394e-05,
      "loss": 3.0336,
      "step": 311800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.004482869036183e-05,
      "loss": 3.0308,
      "step": 311900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.004162664105027e-05,
      "loss": 3.0506,
      "step": 312000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.747952461242676,
      "eval_runtime": 109.6985,
      "eval_samples_per_second": 91.159,
      "eval_steps_per_second": 5.697,
      "step": 312000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.003842459173871e-05,
      "loss": 3.0391,
      "step": 312100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.003522254242715e-05,
      "loss": 3.0361,
      "step": 312200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.00320204931156e-05,
      "loss": 3.0444,
      "step": 312300
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.002881844380404e-05,
      "loss": 3.0263,
      "step": 312400
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.002561639449248e-05,
      "loss": 3.0648,
      "step": 312500
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.002241434518092e-05,
      "loss": 3.0461,
      "step": 312600
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.001921229586936e-05,
      "loss": 3.0229,
      "step": 312700
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.00160102465578e-05,
      "loss": 3.0637,
      "step": 312800
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.001280819724624e-05,
      "loss": 3.0567,
      "step": 312900
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.000960614793468e-05,
      "loss": 3.0302,
      "step": 313000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7471508979797363,
      "eval_runtime": 110.0586,
      "eval_samples_per_second": 90.861,
      "eval_steps_per_second": 5.679,
      "step": 313000
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.000640409862312e-05,
      "loss": 3.0715,
      "step": 313100
    },
    {
      "epoch": 0.2,
      "learning_rate": 4.0003202049311564e-05,
      "loss": 3.043,
      "step": 313200
    },
    {
      "epoch": 0.2,
      "learning_rate": 4e-05,
      "loss": 3.0708,
      "step": 313300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.999679795068844e-05,
      "loss": 3.0581,
      "step": 313400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.999359590137688e-05,
      "loss": 3.0245,
      "step": 313500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.999039385206532e-05,
      "loss": 3.0347,
      "step": 313600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.998719180275376e-05,
      "loss": 3.0206,
      "step": 313700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.99839897534422e-05,
      "loss": 3.0319,
      "step": 313800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.998078770413065e-05,
      "loss": 3.0488,
      "step": 313900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.997758565481909e-05,
      "loss": 3.065,
      "step": 314000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.742716073989868,
      "eval_runtime": 123.8071,
      "eval_samples_per_second": 80.771,
      "eval_steps_per_second": 5.048,
      "step": 314000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.997438360550753e-05,
      "loss": 3.0398,
      "step": 314100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.997118155619597e-05,
      "loss": 3.0392,
      "step": 314200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.996797950688441e-05,
      "loss": 2.9751,
      "step": 314300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.996477745757285e-05,
      "loss": 2.9938,
      "step": 314400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.996157540826129e-05,
      "loss": 3.0629,
      "step": 314500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.995837335894973e-05,
      "loss": 3.0415,
      "step": 314600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.995517130963817e-05,
      "loss": 3.0437,
      "step": 314700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.995196926032661e-05,
      "loss": 3.0227,
      "step": 314800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.994876721101505e-05,
      "loss": 3.063,
      "step": 314900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.994556516170349e-05,
      "loss": 3.0371,
      "step": 315000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7442448139190674,
      "eval_runtime": 109.8568,
      "eval_samples_per_second": 91.028,
      "eval_steps_per_second": 5.689,
      "step": 315000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.994236311239193e-05,
      "loss": 3.0066,
      "step": 315100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.993916106308037e-05,
      "loss": 3.0544,
      "step": 315200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.993595901376881e-05,
      "loss": 3.0209,
      "step": 315300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.993275696445725e-05,
      "loss": 3.0394,
      "step": 315400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.99295549151457e-05,
      "loss": 3.037,
      "step": 315500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.992635286583414e-05,
      "loss": 3.0359,
      "step": 315600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.992315081652258e-05,
      "loss": 3.0252,
      "step": 315700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.991994876721102e-05,
      "loss": 3.0395,
      "step": 315800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.991674671789946e-05,
      "loss": 3.048,
      "step": 315900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9913544668587897e-05,
      "loss": 3.0469,
      "step": 316000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7428243160247803,
      "eval_runtime": 123.7861,
      "eval_samples_per_second": 80.785,
      "eval_steps_per_second": 5.049,
      "step": 316000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9910342619276336e-05,
      "loss": 3.063,
      "step": 316100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9907140569964776e-05,
      "loss": 3.0227,
      "step": 316200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.990393852065322e-05,
      "loss": 3.0473,
      "step": 316300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.990073647134166e-05,
      "loss": 3.0507,
      "step": 316400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.98975344220301e-05,
      "loss": 3.0284,
      "step": 316500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.989433237271854e-05,
      "loss": 3.0236,
      "step": 316600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.989113032340698e-05,
      "loss": 3.0455,
      "step": 316700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.988792827409542e-05,
      "loss": 3.0376,
      "step": 316800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.988472622478386e-05,
      "loss": 3.0514,
      "step": 316900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.988152417547231e-05,
      "loss": 2.9914,
      "step": 317000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.7433390617370605,
      "eval_runtime": 109.5022,
      "eval_samples_per_second": 91.322,
      "eval_steps_per_second": 5.708,
      "step": 317000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.987832212616075e-05,
      "loss": 3.0522,
      "step": 317100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.987512007684919e-05,
      "loss": 3.0509,
      "step": 317200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.987191802753763e-05,
      "loss": 3.0525,
      "step": 317300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9868715978226066e-05,
      "loss": 3.0477,
      "step": 317400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9865513928914506e-05,
      "loss": 3.0619,
      "step": 317500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9862311879602946e-05,
      "loss": 3.0423,
      "step": 317600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9859109830291386e-05,
      "loss": 3.0105,
      "step": 317700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9855907780979825e-05,
      "loss": 3.0214,
      "step": 317800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.985270573166827e-05,
      "loss": 3.0374,
      "step": 317900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.984950368235671e-05,
      "loss": 3.0508,
      "step": 318000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.740947723388672,
      "eval_runtime": 109.6064,
      "eval_samples_per_second": 91.236,
      "eval_steps_per_second": 5.702,
      "step": 318000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.984630163304515e-05,
      "loss": 3.0209,
      "step": 318100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.984309958373359e-05,
      "loss": 3.0502,
      "step": 318200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.983989753442203e-05,
      "loss": 3.0358,
      "step": 318300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.983669548511047e-05,
      "loss": 3.0333,
      "step": 318400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.983349343579891e-05,
      "loss": 3.0282,
      "step": 318500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.983029138648736e-05,
      "loss": 3.0327,
      "step": 318600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9827089337175797e-05,
      "loss": 3.0595,
      "step": 318700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9823887287864236e-05,
      "loss": 3.0334,
      "step": 318800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9820685238552676e-05,
      "loss": 3.0206,
      "step": 318900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9817483189241116e-05,
      "loss": 3.0232,
      "step": 319000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.742826461791992,
      "eval_runtime": 109.5021,
      "eval_samples_per_second": 91.322,
      "eval_steps_per_second": 5.708,
      "step": 319000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9814281139929556e-05,
      "loss": 3.0418,
      "step": 319100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9811079090617995e-05,
      "loss": 3.0548,
      "step": 319200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.980787704130644e-05,
      "loss": 3.0332,
      "step": 319300
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9804674991994875e-05,
      "loss": 3.0013,
      "step": 319400
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.980147294268332e-05,
      "loss": 3.0155,
      "step": 319500
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.979827089337176e-05,
      "loss": 3.0253,
      "step": 319600
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.97950688440602e-05,
      "loss": 3.0484,
      "step": 319700
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.979186679474864e-05,
      "loss": 3.0287,
      "step": 319800
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.978866474543708e-05,
      "loss": 3.0478,
      "step": 319900
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.978546269612553e-05,
      "loss": 3.0388,
      "step": 320000
    },
    {
      "epoch": 0.2,
      "eval_loss": 2.743786573410034,
      "eval_runtime": 109.6985,
      "eval_samples_per_second": 91.159,
      "eval_steps_per_second": 5.697,
      "step": 320000
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.978226064681396e-05,
      "loss": 3.0141,
      "step": 320100
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9779058597502406e-05,
      "loss": 3.0289,
      "step": 320200
    },
    {
      "epoch": 0.2,
      "learning_rate": 3.9775856548190846e-05,
      "loss": 3.0289,
      "step": 320300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9772654498879286e-05,
      "loss": 3.0314,
      "step": 320400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9769452449567725e-05,
      "loss": 3.0614,
      "step": 320500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9766250400256165e-05,
      "loss": 3.0401,
      "step": 320600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9763048350944605e-05,
      "loss": 3.0575,
      "step": 320700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9759846301633045e-05,
      "loss": 3.041,
      "step": 320800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.975664425232149e-05,
      "loss": 3.039,
      "step": 320900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9753442203009924e-05,
      "loss": 3.0102,
      "step": 321000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7440028190612793,
      "eval_runtime": 109.7684,
      "eval_samples_per_second": 91.101,
      "eval_steps_per_second": 5.694,
      "step": 321000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.975024015369837e-05,
      "loss": 3.0262,
      "step": 321100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.974703810438681e-05,
      "loss": 3.0284,
      "step": 321200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.974383605507525e-05,
      "loss": 3.0268,
      "step": 321300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.974063400576369e-05,
      "loss": 3.0239,
      "step": 321400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.973743195645213e-05,
      "loss": 3.0097,
      "step": 321500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9734229907140576e-05,
      "loss": 3.0229,
      "step": 321600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.973102785782901e-05,
      "loss": 3.0451,
      "step": 321700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9727825808517456e-05,
      "loss": 3.0195,
      "step": 321800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9724623759205895e-05,
      "loss": 3.0276,
      "step": 321900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9721421709894335e-05,
      "loss": 3.0587,
      "step": 322000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7399184703826904,
      "eval_runtime": 109.7135,
      "eval_samples_per_second": 91.146,
      "eval_steps_per_second": 5.697,
      "step": 322000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9718219660582775e-05,
      "loss": 3.0395,
      "step": 322100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9715017611271215e-05,
      "loss": 3.0326,
      "step": 322200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.971181556195966e-05,
      "loss": 3.0126,
      "step": 322300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9708613512648094e-05,
      "loss": 3.0306,
      "step": 322400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.970541146333654e-05,
      "loss": 3.0418,
      "step": 322500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9702209414024974e-05,
      "loss": 2.9924,
      "step": 322600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.969900736471342e-05,
      "loss": 3.0507,
      "step": 322700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.969580531540186e-05,
      "loss": 3.0297,
      "step": 322800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.96926032660903e-05,
      "loss": 3.039,
      "step": 322900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.968940121677874e-05,
      "loss": 3.0266,
      "step": 323000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7403018474578857,
      "eval_runtime": 110.0189,
      "eval_samples_per_second": 90.893,
      "eval_steps_per_second": 5.681,
      "step": 323000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.968619916746718e-05,
      "loss": 3.0243,
      "step": 323100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9682997118155625e-05,
      "loss": 3.0245,
      "step": 323200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.967979506884406e-05,
      "loss": 3.0251,
      "step": 323300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9676593019532505e-05,
      "loss": 3.0298,
      "step": 323400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9673390970220945e-05,
      "loss": 3.0308,
      "step": 323500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9670188920909384e-05,
      "loss": 3.032,
      "step": 323600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9666986871597824e-05,
      "loss": 3.0373,
      "step": 323700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9663784822286264e-05,
      "loss": 3.0531,
      "step": 323800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.966058277297471e-05,
      "loss": 3.0324,
      "step": 323900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9657380723663143e-05,
      "loss": 3.0315,
      "step": 324000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.741628646850586,
      "eval_runtime": 110.1049,
      "eval_samples_per_second": 90.823,
      "eval_steps_per_second": 5.676,
      "step": 324000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.965417867435159e-05,
      "loss": 3.0351,
      "step": 324100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.965097662504002e-05,
      "loss": 3.0175,
      "step": 324200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.964777457572847e-05,
      "loss": 3.0293,
      "step": 324300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.964457252641691e-05,
      "loss": 3.0484,
      "step": 324400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.964137047710535e-05,
      "loss": 3.0577,
      "step": 324500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9638168427793795e-05,
      "loss": 3.0495,
      "step": 324600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.963496637848223e-05,
      "loss": 3.0412,
      "step": 324700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9631764329170675e-05,
      "loss": 3.0346,
      "step": 324800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.962856227985911e-05,
      "loss": 3.0308,
      "step": 324900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9625360230547554e-05,
      "loss": 3.0306,
      "step": 325000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7406821250915527,
      "eval_runtime": 109.9814,
      "eval_samples_per_second": 90.924,
      "eval_steps_per_second": 5.683,
      "step": 325000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9622158181235994e-05,
      "loss": 3.0321,
      "step": 325100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9618956131924434e-05,
      "loss": 3.0282,
      "step": 325200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9615754082612874e-05,
      "loss": 3.0171,
      "step": 325300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.961255203330131e-05,
      "loss": 3.0494,
      "step": 325400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.960934998398976e-05,
      "loss": 3.0304,
      "step": 325500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.960614793467819e-05,
      "loss": 3.0214,
      "step": 325600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.960294588536664e-05,
      "loss": 3.044,
      "step": 325700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.959974383605507e-05,
      "loss": 3.0405,
      "step": 325800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.959654178674352e-05,
      "loss": 3.0404,
      "step": 325900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.959333973743196e-05,
      "loss": 3.0268,
      "step": 326000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7411460876464844,
      "eval_runtime": 109.3622,
      "eval_samples_per_second": 91.439,
      "eval_steps_per_second": 5.715,
      "step": 326000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.95901376881204e-05,
      "loss": 3.0361,
      "step": 326100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9586935638808845e-05,
      "loss": 3.0344,
      "step": 326200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.958373358949728e-05,
      "loss": 3.0341,
      "step": 326300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9580531540185724e-05,
      "loss": 3.0462,
      "step": 326400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.957732949087416e-05,
      "loss": 3.0515,
      "step": 326500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9574127441562604e-05,
      "loss": 3.0495,
      "step": 326600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9570925392251043e-05,
      "loss": 3.0565,
      "step": 326700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.956772334293948e-05,
      "loss": 3.0476,
      "step": 326800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.956452129362793e-05,
      "loss": 3.023,
      "step": 326900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.956131924431636e-05,
      "loss": 3.0351,
      "step": 327000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7406504154205322,
      "eval_runtime": 109.75,
      "eval_samples_per_second": 91.116,
      "eval_steps_per_second": 5.695,
      "step": 327000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.955811719500481e-05,
      "loss": 3.049,
      "step": 327100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.955491514569324e-05,
      "loss": 3.0334,
      "step": 327200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.955171309638169e-05,
      "loss": 3.0524,
      "step": 327300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.954851104707012e-05,
      "loss": 3.0246,
      "step": 327400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.954530899775857e-05,
      "loss": 3.0575,
      "step": 327500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.954210694844701e-05,
      "loss": 3.0322,
      "step": 327600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.953890489913545e-05,
      "loss": 3.033,
      "step": 327700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9535702849823894e-05,
      "loss": 3.0141,
      "step": 327800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.953250080051233e-05,
      "loss": 3.0207,
      "step": 327900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9529298751200774e-05,
      "loss": 3.0253,
      "step": 328000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.741025686264038,
      "eval_runtime": 110.0969,
      "eval_samples_per_second": 90.829,
      "eval_steps_per_second": 5.677,
      "step": 328000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9526096701889207e-05,
      "loss": 3.0295,
      "step": 328100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.952289465257765e-05,
      "loss": 3.0275,
      "step": 328200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.951969260326609e-05,
      "loss": 3.0497,
      "step": 328300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.951649055395453e-05,
      "loss": 3.036,
      "step": 328400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.951328850464298e-05,
      "loss": 3.0628,
      "step": 328500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.951008645533141e-05,
      "loss": 3.031,
      "step": 328600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.950688440601986e-05,
      "loss": 3.0393,
      "step": 328700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.950368235670829e-05,
      "loss": 3.0377,
      "step": 328800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.950048030739674e-05,
      "loss": 3.0407,
      "step": 328900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.949727825808517e-05,
      "loss": 3.0533,
      "step": 329000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.740238666534424,
      "eval_runtime": 109.7571,
      "eval_samples_per_second": 91.11,
      "eval_steps_per_second": 5.694,
      "step": 329000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.949407620877362e-05,
      "loss": 3.016,
      "step": 329100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.949087415946206e-05,
      "loss": 3.0625,
      "step": 329200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.94876721101505e-05,
      "loss": 3.039,
      "step": 329300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9484470060838944e-05,
      "loss": 3.0466,
      "step": 329400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9481268011527376e-05,
      "loss": 3.0398,
      "step": 329500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.947806596221582e-05,
      "loss": 3.0573,
      "step": 329600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9474863912904256e-05,
      "loss": 3.055,
      "step": 329700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.94716618635927e-05,
      "loss": 3.0401,
      "step": 329800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.946845981428114e-05,
      "loss": 3.0246,
      "step": 329900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.946525776496958e-05,
      "loss": 3.0473,
      "step": 330000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7392821311950684,
      "eval_runtime": 109.9863,
      "eval_samples_per_second": 90.92,
      "eval_steps_per_second": 5.683,
      "step": 330000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.946205571565802e-05,
      "loss": 3.0211,
      "step": 330100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.945885366634646e-05,
      "loss": 3.0349,
      "step": 330200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.945565161703491e-05,
      "loss": 3.0195,
      "step": 330300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.945244956772334e-05,
      "loss": 3.0383,
      "step": 330400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.944924751841179e-05,
      "loss": 3.0242,
      "step": 330500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.944604546910022e-05,
      "loss": 3.0223,
      "step": 330600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.944284341978867e-05,
      "loss": 3.0383,
      "step": 330700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.943964137047711e-05,
      "loss": 3.0337,
      "step": 330800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9436439321165546e-05,
      "loss": 3.0207,
      "step": 330900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.943323727185399e-05,
      "loss": 3.0253,
      "step": 331000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.739223003387451,
      "eval_runtime": 123.4128,
      "eval_samples_per_second": 81.029,
      "eval_steps_per_second": 5.064,
      "step": 331000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9430035222542426e-05,
      "loss": 3.0418,
      "step": 331100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.942683317323087e-05,
      "loss": 3.027,
      "step": 331200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9423631123919305e-05,
      "loss": 3.0441,
      "step": 331300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.942042907460775e-05,
      "loss": 3.0235,
      "step": 331400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.941722702529619e-05,
      "loss": 3.0245,
      "step": 331500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.941402497598463e-05,
      "loss": 3.0521,
      "step": 331600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.941082292667307e-05,
      "loss": 3.0462,
      "step": 331700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.940762087736151e-05,
      "loss": 3.0528,
      "step": 331800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.940441882804996e-05,
      "loss": 3.0346,
      "step": 331900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.940121677873839e-05,
      "loss": 3.0435,
      "step": 332000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7394144535064697,
      "eval_runtime": 110.0998,
      "eval_samples_per_second": 90.827,
      "eval_steps_per_second": 5.677,
      "step": 332000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.939801472942684e-05,
      "loss": 3.0265,
      "step": 332100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9394812680115277e-05,
      "loss": 3.0165,
      "step": 332200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9391610630803716e-05,
      "loss": 3.0493,
      "step": 332300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9388408581492156e-05,
      "loss": 3.0425,
      "step": 332400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9385206532180596e-05,
      "loss": 3.0203,
      "step": 332500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.938200448286904e-05,
      "loss": 3.0008,
      "step": 332600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9378802433557475e-05,
      "loss": 3.0576,
      "step": 332700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.937560038424592e-05,
      "loss": 3.0275,
      "step": 332800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9372398334934355e-05,
      "loss": 3.0554,
      "step": 332900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.93691962856228e-05,
      "loss": 3.0476,
      "step": 333000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.739452838897705,
      "eval_runtime": 123.9343,
      "eval_samples_per_second": 80.688,
      "eval_steps_per_second": 5.043,
      "step": 333000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.936599423631124e-05,
      "loss": 3.0428,
      "step": 333100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.936279218699968e-05,
      "loss": 3.0284,
      "step": 333200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.935959013768812e-05,
      "loss": 3.0338,
      "step": 333300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.935638808837656e-05,
      "loss": 3.021,
      "step": 333400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.935318603906501e-05,
      "loss": 3.0654,
      "step": 333500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.934998398975344e-05,
      "loss": 3.0335,
      "step": 333600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9346781940441886e-05,
      "loss": 3.0281,
      "step": 333700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9343579891130326e-05,
      "loss": 3.0789,
      "step": 333800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9340377841818766e-05,
      "loss": 3.0486,
      "step": 333900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9337175792507205e-05,
      "loss": 3.0381,
      "step": 334000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7367255687713623,
      "eval_runtime": 109.4104,
      "eval_samples_per_second": 91.399,
      "eval_steps_per_second": 5.712,
      "step": 334000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9333973743195645e-05,
      "loss": 3.0046,
      "step": 334100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.933077169388409e-05,
      "loss": 3.0545,
      "step": 334200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9327569644572525e-05,
      "loss": 3.0281,
      "step": 334300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.932436759526097e-05,
      "loss": 3.0265,
      "step": 334400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.932116554594941e-05,
      "loss": 3.0584,
      "step": 334500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.931796349663785e-05,
      "loss": 3.0399,
      "step": 334600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.931476144732629e-05,
      "loss": 3.0546,
      "step": 334700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.931155939801473e-05,
      "loss": 3.0238,
      "step": 334800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.930835734870317e-05,
      "loss": 3.0184,
      "step": 334900
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.930515529939161e-05,
      "loss": 3.053,
      "step": 335000
    },
    {
      "epoch": 0.21,
      "eval_loss": 2.7367031574249268,
      "eval_runtime": 122.8203,
      "eval_samples_per_second": 81.42,
      "eval_steps_per_second": 5.089,
      "step": 335000
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9301953250080056e-05,
      "loss": 3.0315,
      "step": 335100
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.929875120076849e-05,
      "loss": 3.0341,
      "step": 335200
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9295549151456936e-05,
      "loss": 3.0266,
      "step": 335300
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9292347102145375e-05,
      "loss": 3.0242,
      "step": 335400
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9289145052833815e-05,
      "loss": 3.0375,
      "step": 335500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9285943003522255e-05,
      "loss": 3.0638,
      "step": 335600
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9282740954210694e-05,
      "loss": 3.029,
      "step": 335700
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.927953890489914e-05,
      "loss": 3.0279,
      "step": 335800
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9276336855587574e-05,
      "loss": 3.0153,
      "step": 335900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.927313480627602e-05,
      "loss": 3.031,
      "step": 336000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7403688430786133,
      "eval_runtime": 109.609,
      "eval_samples_per_second": 91.233,
      "eval_steps_per_second": 5.702,
      "step": 336000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.926993275696446e-05,
      "loss": 3.0423,
      "step": 336100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.92667307076529e-05,
      "loss": 3.054,
      "step": 336200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.926352865834134e-05,
      "loss": 3.0323,
      "step": 336300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.926032660902978e-05,
      "loss": 3.021,
      "step": 336400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.925712455971822e-05,
      "loss": 3.038,
      "step": 336500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.925392251040666e-05,
      "loss": 3.0345,
      "step": 336600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9250720461095105e-05,
      "loss": 3.0222,
      "step": 336700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9247518411783545e-05,
      "loss": 3.0278,
      "step": 336800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9244316362471985e-05,
      "loss": 3.0492,
      "step": 336900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9241114313160425e-05,
      "loss": 3.0314,
      "step": 337000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.739436388015747,
      "eval_runtime": 124.2417,
      "eval_samples_per_second": 80.488,
      "eval_steps_per_second": 5.031,
      "step": 337000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9237912263848864e-05,
      "loss": 3.0287,
      "step": 337100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9234710214537304e-05,
      "loss": 3.016,
      "step": 337200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9231508165225744e-05,
      "loss": 2.9961,
      "step": 337300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.922830611591419e-05,
      "loss": 3.0515,
      "step": 337400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.922510406660262e-05,
      "loss": 3.0447,
      "step": 337500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.922190201729107e-05,
      "loss": 3.0126,
      "step": 337600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.921869996797951e-05,
      "loss": 3.0404,
      "step": 337700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.921549791866795e-05,
      "loss": 3.0218,
      "step": 337800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.921229586935639e-05,
      "loss": 3.0357,
      "step": 337900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.920909382004483e-05,
      "loss": 3.0522,
      "step": 338000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7376296520233154,
      "eval_runtime": 109.5124,
      "eval_samples_per_second": 91.314,
      "eval_steps_per_second": 5.707,
      "step": 338000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.920589177073327e-05,
      "loss": 2.9912,
      "step": 338100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.920268972142171e-05,
      "loss": 3.0685,
      "step": 338200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9199487672110155e-05,
      "loss": 3.049,
      "step": 338300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9196285622798595e-05,
      "loss": 3.0727,
      "step": 338400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9193083573487034e-05,
      "loss": 3.0191,
      "step": 338500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9189881524175474e-05,
      "loss": 3.0206,
      "step": 338600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9186679474863914e-05,
      "loss": 3.0099,
      "step": 338700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9183477425552353e-05,
      "loss": 3.0028,
      "step": 338800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.918027537624079e-05,
      "loss": 3.0245,
      "step": 338900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.917707332692924e-05,
      "loss": 3.018,
      "step": 339000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.738215684890747,
      "eval_runtime": 108.785,
      "eval_samples_per_second": 91.924,
      "eval_steps_per_second": 5.745,
      "step": 339000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.917387127761768e-05,
      "loss": 3.0379,
      "step": 339100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.917066922830612e-05,
      "loss": 3.0334,
      "step": 339200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.916746717899456e-05,
      "loss": 3.0244,
      "step": 339300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9164265129683e-05,
      "loss": 3.0319,
      "step": 339400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.916106308037144e-05,
      "loss": 3.0354,
      "step": 339500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.915786103105988e-05,
      "loss": 3.0064,
      "step": 339600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.915465898174832e-05,
      "loss": 3.0374,
      "step": 339700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9151456932436764e-05,
      "loss": 3.0492,
      "step": 339800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9148254883125204e-05,
      "loss": 3.0407,
      "step": 339900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9145052833813644e-05,
      "loss": 3.0201,
      "step": 340000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.737098455429077,
      "eval_runtime": 110.1611,
      "eval_samples_per_second": 90.776,
      "eval_steps_per_second": 5.674,
      "step": 340000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9141850784502084e-05,
      "loss": 3.0196,
      "step": 340100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.913864873519052e-05,
      "loss": 3.0158,
      "step": 340200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.913544668587896e-05,
      "loss": 3.0443,
      "step": 340300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.91322446365674e-05,
      "loss": 3.0197,
      "step": 340400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.912904258725584e-05,
      "loss": 3.0211,
      "step": 340500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.912584053794429e-05,
      "loss": 3.0289,
      "step": 340600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.912263848863273e-05,
      "loss": 3.0334,
      "step": 340700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.911943643932117e-05,
      "loss": 3.0204,
      "step": 340800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.911623439000961e-05,
      "loss": 3.045,
      "step": 340900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.911303234069805e-05,
      "loss": 3.0368,
      "step": 341000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7369439601898193,
      "eval_runtime": 110.0095,
      "eval_samples_per_second": 90.901,
      "eval_steps_per_second": 5.681,
      "step": 341000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.910983029138649e-05,
      "loss": 3.0151,
      "step": 341100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.910662824207493e-05,
      "loss": 3.0496,
      "step": 341200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.910342619276337e-05,
      "loss": 3.031,
      "step": 341300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9100224143451814e-05,
      "loss": 3.0432,
      "step": 341400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9097022094140254e-05,
      "loss": 3.018,
      "step": 341500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.909382004482869e-05,
      "loss": 3.0418,
      "step": 341600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.909061799551713e-05,
      "loss": 3.0491,
      "step": 341700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.908741594620557e-05,
      "loss": 3.0153,
      "step": 341800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.908421389689401e-05,
      "loss": 3.0359,
      "step": 341900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.908101184758245e-05,
      "loss": 3.0141,
      "step": 342000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7398972511291504,
      "eval_runtime": 109.9369,
      "eval_samples_per_second": 90.961,
      "eval_steps_per_second": 5.685,
      "step": 342000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.90778097982709e-05,
      "loss": 3.0233,
      "step": 342100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.907460774895934e-05,
      "loss": 3.0519,
      "step": 342200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.907140569964778e-05,
      "loss": 3.0227,
      "step": 342300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.906820365033622e-05,
      "loss": 3.0649,
      "step": 342400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.906500160102466e-05,
      "loss": 3.0151,
      "step": 342500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.90617995517131e-05,
      "loss": 3.0205,
      "step": 342600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.905859750240154e-05,
      "loss": 3.0401,
      "step": 342700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.905539545308998e-05,
      "loss": 3.0241,
      "step": 342800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.905219340377842e-05,
      "loss": 3.0133,
      "step": 342900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.904899135446686e-05,
      "loss": 3.0352,
      "step": 343000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7366628646850586,
      "eval_runtime": 109.4831,
      "eval_samples_per_second": 91.338,
      "eval_steps_per_second": 5.709,
      "step": 343000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.90457893051553e-05,
      "loss": 3.0022,
      "step": 343100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.904258725584374e-05,
      "loss": 3.029,
      "step": 343200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.903938520653218e-05,
      "loss": 3.0372,
      "step": 343300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.903618315722062e-05,
      "loss": 3.0331,
      "step": 343400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.903298110790906e-05,
      "loss": 3.0186,
      "step": 343500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.90297790585975e-05,
      "loss": 3.0069,
      "step": 343600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.902657700928595e-05,
      "loss": 3.0283,
      "step": 343700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.902337495997439e-05,
      "loss": 3.0288,
      "step": 343800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.902017291066283e-05,
      "loss": 3.0269,
      "step": 343900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.901697086135127e-05,
      "loss": 3.0422,
      "step": 344000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7353785037994385,
      "eval_runtime": 109.5756,
      "eval_samples_per_second": 91.261,
      "eval_steps_per_second": 5.704,
      "step": 344000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.901376881203971e-05,
      "loss": 3.0282,
      "step": 344100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.901056676272815e-05,
      "loss": 3.0032,
      "step": 344200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9007364713416587e-05,
      "loss": 3.0575,
      "step": 344300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.900416266410503e-05,
      "loss": 3.0242,
      "step": 344400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.9000960614793466e-05,
      "loss": 3.0352,
      "step": 344500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.899775856548191e-05,
      "loss": 3.0368,
      "step": 344600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.899455651617035e-05,
      "loss": 3.0507,
      "step": 344700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.899135446685879e-05,
      "loss": 3.0243,
      "step": 344800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.898815241754723e-05,
      "loss": 3.0508,
      "step": 344900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.898495036823567e-05,
      "loss": 3.0478,
      "step": 345000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.734189510345459,
      "eval_runtime": 109.5438,
      "eval_samples_per_second": 91.288,
      "eval_steps_per_second": 5.705,
      "step": 345000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.898174831892411e-05,
      "loss": 3.0397,
      "step": 345100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.897854626961255e-05,
      "loss": 3.0324,
      "step": 345200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8975344220301e-05,
      "loss": 3.0296,
      "step": 345300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.897214217098944e-05,
      "loss": 3.0423,
      "step": 345400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.896894012167788e-05,
      "loss": 3.0136,
      "step": 345500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.896573807236632e-05,
      "loss": 3.0191,
      "step": 345600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8962536023054756e-05,
      "loss": 3.0127,
      "step": 345700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8959333973743196e-05,
      "loss": 3.0232,
      "step": 345800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8956131924431636e-05,
      "loss": 3.0303,
      "step": 345900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.895292987512008e-05,
      "loss": 3.0246,
      "step": 346000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.738823890686035,
      "eval_runtime": 110.0813,
      "eval_samples_per_second": 90.842,
      "eval_steps_per_second": 5.678,
      "step": 346000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8949727825808515e-05,
      "loss": 3.0101,
      "step": 346100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.894652577649696e-05,
      "loss": 3.0348,
      "step": 346200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.89433237271854e-05,
      "loss": 3.0353,
      "step": 346300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.894012167787384e-05,
      "loss": 3.0097,
      "step": 346400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.893691962856228e-05,
      "loss": 3.0394,
      "step": 346500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.893371757925072e-05,
      "loss": 3.0528,
      "step": 346600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.893051552993917e-05,
      "loss": 3.0303,
      "step": 346700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.89273134806276e-05,
      "loss": 3.0278,
      "step": 346800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.892411143131605e-05,
      "loss": 3.0324,
      "step": 346900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8920909382004487e-05,
      "loss": 3.0401,
      "step": 347000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.735464334487915,
      "eval_runtime": 109.9819,
      "eval_samples_per_second": 90.924,
      "eval_steps_per_second": 5.683,
      "step": 347000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8917707332692926e-05,
      "loss": 3.015,
      "step": 347100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8914505283381366e-05,
      "loss": 3.0067,
      "step": 347200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8911303234069806e-05,
      "loss": 3.0384,
      "step": 347300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8908101184758246e-05,
      "loss": 3.0197,
      "step": 347400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8904899135446685e-05,
      "loss": 3.0339,
      "step": 347500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.890169708613513e-05,
      "loss": 3.0541,
      "step": 347600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8898495036823565e-05,
      "loss": 3.0419,
      "step": 347700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.889529298751201e-05,
      "loss": 3.036,
      "step": 347800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.889209093820045e-05,
      "loss": 3.0177,
      "step": 347900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.888888888888889e-05,
      "loss": 3.0361,
      "step": 348000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7359886169433594,
      "eval_runtime": 109.8939,
      "eval_samples_per_second": 90.997,
      "eval_steps_per_second": 5.687,
      "step": 348000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.888568683957733e-05,
      "loss": 3.0359,
      "step": 348100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.888248479026577e-05,
      "loss": 3.0109,
      "step": 348200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.887928274095422e-05,
      "loss": 3.0612,
      "step": 348300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.887608069164265e-05,
      "loss": 3.006,
      "step": 348400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8872878642331096e-05,
      "loss": 3.014,
      "step": 348500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8869676593019536e-05,
      "loss": 3.0401,
      "step": 348600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8866474543707976e-05,
      "loss": 3.0647,
      "step": 348700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8863272494396415e-05,
      "loss": 3.0322,
      "step": 348800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8860070445084855e-05,
      "loss": 3.0127,
      "step": 348900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.88568683957733e-05,
      "loss": 3.0202,
      "step": 349000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.734914541244507,
      "eval_runtime": 109.8452,
      "eval_samples_per_second": 91.037,
      "eval_steps_per_second": 5.69,
      "step": 349000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8853666346461735e-05,
      "loss": 3.044,
      "step": 349100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.885046429715018e-05,
      "loss": 3.0198,
      "step": 349200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8847262247838614e-05,
      "loss": 3.0501,
      "step": 349300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.884406019852706e-05,
      "loss": 3.0419,
      "step": 349400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.88408581492155e-05,
      "loss": 3.0389,
      "step": 349500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.883765609990394e-05,
      "loss": 3.0277,
      "step": 349600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.883445405059239e-05,
      "loss": 3.0145,
      "step": 349700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.883125200128082e-05,
      "loss": 3.0216,
      "step": 349800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8828049951969266e-05,
      "loss": 3.0244,
      "step": 349900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.88248479026577e-05,
      "loss": 3.0444,
      "step": 350000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.732851982116699,
      "eval_runtime": 109.953,
      "eval_samples_per_second": 90.948,
      "eval_steps_per_second": 5.684,
      "step": 350000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8821645853346146e-05,
      "loss": 3.0097,
      "step": 350100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8818443804034585e-05,
      "loss": 3.026,
      "step": 350200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8815241754723025e-05,
      "loss": 3.0095,
      "step": 350300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8812039705411465e-05,
      "loss": 3.03,
      "step": 350400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8808837656099905e-05,
      "loss": 3.0065,
      "step": 350500
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.880563560678835e-05,
      "loss": 3.03,
      "step": 350600
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8802433557476784e-05,
      "loss": 3.0477,
      "step": 350700
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.879923150816523e-05,
      "loss": 3.007,
      "step": 350800
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8796029458853664e-05,
      "loss": 3.0422,
      "step": 350900
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.879282740954211e-05,
      "loss": 3.0402,
      "step": 351000
    },
    {
      "epoch": 0.22,
      "eval_loss": 2.7316129207611084,
      "eval_runtime": 109.5844,
      "eval_samples_per_second": 91.254,
      "eval_steps_per_second": 5.703,
      "step": 351000
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.878962536023055e-05,
      "loss": 3.0084,
      "step": 351100
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.878642331091899e-05,
      "loss": 3.0296,
      "step": 351200
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8783221261607436e-05,
      "loss": 3.0609,
      "step": 351300
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.878001921229587e-05,
      "loss": 3.0424,
      "step": 351400
    },
    {
      "epoch": 0.22,
      "learning_rate": 3.8776817162984315e-05,
      "loss": 3.005,
      "step": 351500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.877361511367275e-05,
      "loss": 3.0067,
      "step": 351600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8770413064361195e-05,
      "loss": 3.003,
      "step": 351700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8767211015049635e-05,
      "loss": 3.0576,
      "step": 351800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8764008965738074e-05,
      "loss": 2.9774,
      "step": 351900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8760806916426514e-05,
      "loss": 3.0195,
      "step": 352000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.7340450286865234,
      "eval_runtime": 109.3684,
      "eval_samples_per_second": 91.434,
      "eval_steps_per_second": 5.715,
      "step": 352000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8757604867114954e-05,
      "loss": 3.0161,
      "step": 352100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.87544028178034e-05,
      "loss": 3.0212,
      "step": 352200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8751200768491833e-05,
      "loss": 3.0321,
      "step": 352300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.874799871918028e-05,
      "loss": 3.0208,
      "step": 352400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.874479666986871e-05,
      "loss": 3.0539,
      "step": 352500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.874159462055716e-05,
      "loss": 3.0269,
      "step": 352600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.87383925712456e-05,
      "loss": 3.0101,
      "step": 352700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.873519052193404e-05,
      "loss": 3.0371,
      "step": 352800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8731988472622485e-05,
      "loss": 3.0356,
      "step": 352900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.872878642331092e-05,
      "loss": 3.0234,
      "step": 353000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.731426477432251,
      "eval_runtime": 109.6352,
      "eval_samples_per_second": 91.212,
      "eval_steps_per_second": 5.701,
      "step": 353000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8725584373999365e-05,
      "loss": 3.0068,
      "step": 353100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.87223823246878e-05,
      "loss": 3.0226,
      "step": 353200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8719180275376244e-05,
      "loss": 3.0201,
      "step": 353300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8715978226064684e-05,
      "loss": 3.0031,
      "step": 353400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8712776176753124e-05,
      "loss": 3.0424,
      "step": 353500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8709574127441564e-05,
      "loss": 3.02,
      "step": 353600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.870637207813e-05,
      "loss": 3.0261,
      "step": 353700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.870317002881845e-05,
      "loss": 3.0075,
      "step": 353800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.869996797950688e-05,
      "loss": 3.0247,
      "step": 353900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.869676593019533e-05,
      "loss": 2.9934,
      "step": 354000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.7345030307769775,
      "eval_runtime": 110.3152,
      "eval_samples_per_second": 90.649,
      "eval_steps_per_second": 5.666,
      "step": 354000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.869356388088376e-05,
      "loss": 3.0281,
      "step": 354100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.869036183157221e-05,
      "loss": 3.0504,
      "step": 354200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.868715978226065e-05,
      "loss": 3.027,
      "step": 354300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.868395773294909e-05,
      "loss": 3.0308,
      "step": 354400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8680755683637535e-05,
      "loss": 3.0212,
      "step": 354500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.867755363432597e-05,
      "loss": 3.0403,
      "step": 354600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8674351585014414e-05,
      "loss": 3.0128,
      "step": 354700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.867114953570285e-05,
      "loss": 3.0343,
      "step": 354800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8667947486391294e-05,
      "loss": 3.0258,
      "step": 354900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8664745437079733e-05,
      "loss": 3.0321,
      "step": 355000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.730273962020874,
      "eval_runtime": 109.737,
      "eval_samples_per_second": 91.127,
      "eval_steps_per_second": 5.695,
      "step": 355000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.866154338776817e-05,
      "loss": 3.0252,
      "step": 355100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.865834133845661e-05,
      "loss": 3.0267,
      "step": 355200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.865513928914505e-05,
      "loss": 3.0332,
      "step": 355300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.86519372398335e-05,
      "loss": 3.0139,
      "step": 355400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.864873519052193e-05,
      "loss": 3.0287,
      "step": 355500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.864553314121038e-05,
      "loss": 3.0336,
      "step": 355600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.864233109189881e-05,
      "loss": 3.0124,
      "step": 355700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.863912904258726e-05,
      "loss": 3.0218,
      "step": 355800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.86359269932757e-05,
      "loss": 3.0228,
      "step": 355900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.863272494396414e-05,
      "loss": 3.0118,
      "step": 356000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.732738733291626,
      "eval_runtime": 109.4481,
      "eval_samples_per_second": 91.368,
      "eval_steps_per_second": 5.71,
      "step": 356000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8629522894652584e-05,
      "loss": 3.0428,
      "step": 356100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.862632084534102e-05,
      "loss": 3.018,
      "step": 356200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8623118796029464e-05,
      "loss": 3.0229,
      "step": 356300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8619916746717897e-05,
      "loss": 3.0301,
      "step": 356400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.861671469740634e-05,
      "loss": 3.0101,
      "step": 356500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.861351264809478e-05,
      "loss": 3.0297,
      "step": 356600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.861031059878322e-05,
      "loss": 3.0385,
      "step": 356700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.860710854947166e-05,
      "loss": 3.0191,
      "step": 356800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.86039065001601e-05,
      "loss": 3.0196,
      "step": 356900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.860070445084855e-05,
      "loss": 3.0356,
      "step": 357000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.732578992843628,
      "eval_runtime": 109.5387,
      "eval_samples_per_second": 91.292,
      "eval_steps_per_second": 5.706,
      "step": 357000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.859750240153698e-05,
      "loss": 3.0249,
      "step": 357100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.859430035222543e-05,
      "loss": 3.0411,
      "step": 357200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.859109830291386e-05,
      "loss": 3.023,
      "step": 357300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.858789625360231e-05,
      "loss": 3.0259,
      "step": 357400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.858469420429075e-05,
      "loss": 3.0198,
      "step": 357500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.858149215497919e-05,
      "loss": 3.029,
      "step": 357600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8578290105667633e-05,
      "loss": 3.0409,
      "step": 357700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8575088056356066e-05,
      "loss": 3.0429,
      "step": 357800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.857188600704451e-05,
      "loss": 3.0108,
      "step": 357900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8568683957732946e-05,
      "loss": 3.0258,
      "step": 358000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.732177734375,
      "eval_runtime": 109.7432,
      "eval_samples_per_second": 91.122,
      "eval_steps_per_second": 5.695,
      "step": 358000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.856548190842139e-05,
      "loss": 3.0336,
      "step": 358100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.856227985910983e-05,
      "loss": 3.0114,
      "step": 358200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.855907780979827e-05,
      "loss": 3.0145,
      "step": 358300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.855587576048671e-05,
      "loss": 3.0319,
      "step": 358400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.855267371117515e-05,
      "loss": 3.0116,
      "step": 358500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.85494716618636e-05,
      "loss": 3.0047,
      "step": 358600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.854626961255203e-05,
      "loss": 3.0303,
      "step": 358700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.854306756324048e-05,
      "loss": 3.0248,
      "step": 358800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.853986551392892e-05,
      "loss": 3.0192,
      "step": 358900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.853666346461736e-05,
      "loss": 3.0219,
      "step": 359000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.731646776199341,
      "eval_runtime": 109.9878,
      "eval_samples_per_second": 90.919,
      "eval_steps_per_second": 5.682,
      "step": 359000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.85334614153058e-05,
      "loss": 3.0062,
      "step": 359100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8530259365994236e-05,
      "loss": 3.0257,
      "step": 359200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.852705731668268e-05,
      "loss": 3.0188,
      "step": 359300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8523855267371116e-05,
      "loss": 3.0315,
      "step": 359400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.852065321805956e-05,
      "loss": 3.0434,
      "step": 359500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8517451168748e-05,
      "loss": 3.015,
      "step": 359600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.851424911943644e-05,
      "loss": 3.0245,
      "step": 359700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.851104707012488e-05,
      "loss": 3.0466,
      "step": 359800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.850784502081332e-05,
      "loss": 3.0274,
      "step": 359900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.850464297150176e-05,
      "loss": 3.0187,
      "step": 360000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.729813814163208,
      "eval_runtime": 109.5297,
      "eval_samples_per_second": 91.299,
      "eval_steps_per_second": 5.706,
      "step": 360000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.85014409221902e-05,
      "loss": 3.0378,
      "step": 360100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.849823887287865e-05,
      "loss": 3.0016,
      "step": 360200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.849503682356708e-05,
      "loss": 3.0147,
      "step": 360300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.849183477425553e-05,
      "loss": 3.0138,
      "step": 360400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8488632724943967e-05,
      "loss": 3.0147,
      "step": 360500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8485430675632406e-05,
      "loss": 3.0282,
      "step": 360600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8482228626320846e-05,
      "loss": 3.0095,
      "step": 360700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8479026577009286e-05,
      "loss": 3.0213,
      "step": 360800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.847582452769773e-05,
      "loss": 3.0258,
      "step": 360900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8472622478386165e-05,
      "loss": 3.0287,
      "step": 361000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.731163263320923,
      "eval_runtime": 109.3475,
      "eval_samples_per_second": 91.452,
      "eval_steps_per_second": 5.716,
      "step": 361000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.846942042907461e-05,
      "loss": 3.0014,
      "step": 361100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.846621837976305e-05,
      "loss": 2.9955,
      "step": 361200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.846301633045149e-05,
      "loss": 3.0031,
      "step": 361300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.845981428113993e-05,
      "loss": 3.0118,
      "step": 361400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.845661223182837e-05,
      "loss": 2.9778,
      "step": 361500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.845341018251681e-05,
      "loss": 3.0205,
      "step": 361600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.845020813320525e-05,
      "loss": 3.0332,
      "step": 361700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.84470060838937e-05,
      "loss": 3.038,
      "step": 361800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8443804034582136e-05,
      "loss": 3.0256,
      "step": 361900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8440601985270576e-05,
      "loss": 3.0139,
      "step": 362000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.731630325317383,
      "eval_runtime": 109.6822,
      "eval_samples_per_second": 91.172,
      "eval_steps_per_second": 5.698,
      "step": 362000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8437399935959016e-05,
      "loss": 2.9692,
      "step": 362100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8434197886647456e-05,
      "loss": 3.0222,
      "step": 362200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8430995837335895e-05,
      "loss": 3.0144,
      "step": 362300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8427793788024335e-05,
      "loss": 3.0087,
      "step": 362400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.842459173871278e-05,
      "loss": 3.0492,
      "step": 362500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8421389689401215e-05,
      "loss": 3.0074,
      "step": 362600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.841818764008966e-05,
      "loss": 2.9948,
      "step": 362700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.84149855907781e-05,
      "loss": 3.0031,
      "step": 362800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.841178354146654e-05,
      "loss": 3.0114,
      "step": 362900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.840858149215498e-05,
      "loss": 3.0086,
      "step": 363000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.7331531047821045,
      "eval_runtime": 109.5667,
      "eval_samples_per_second": 91.269,
      "eval_steps_per_second": 5.704,
      "step": 363000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.840537944284342e-05,
      "loss": 3.0302,
      "step": 363100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.840217739353186e-05,
      "loss": 3.0162,
      "step": 363200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.83989753442203e-05,
      "loss": 3.0233,
      "step": 363300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8395773294908746e-05,
      "loss": 3.0149,
      "step": 363400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8392571245597186e-05,
      "loss": 3.0091,
      "step": 363500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8389369196285626e-05,
      "loss": 3.0074,
      "step": 363600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8386167146974065e-05,
      "loss": 3.045,
      "step": 363700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8382965097662505e-05,
      "loss": 3.0291,
      "step": 363800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8379763048350945e-05,
      "loss": 3.0167,
      "step": 363900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8376560999039384e-05,
      "loss": 3.0216,
      "step": 364000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.7286360263824463,
      "eval_runtime": 109.42,
      "eval_samples_per_second": 91.391,
      "eval_steps_per_second": 5.712,
      "step": 364000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.837335894972783e-05,
      "loss": 3.0138,
      "step": 364100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.837015690041627e-05,
      "loss": 3.0198,
      "step": 364200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.836695485110471e-05,
      "loss": 3.01,
      "step": 364300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.836375280179315e-05,
      "loss": 3.0083,
      "step": 364400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.836055075248159e-05,
      "loss": 2.9834,
      "step": 364500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.835734870317003e-05,
      "loss": 3.0009,
      "step": 364600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.835414665385847e-05,
      "loss": 3.0172,
      "step": 364700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.835094460454691e-05,
      "loss": 3.0178,
      "step": 364800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.834774255523535e-05,
      "loss": 3.0085,
      "step": 364900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8344540505923795e-05,
      "loss": 3.0106,
      "step": 365000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.731403112411499,
      "eval_runtime": 109.1783,
      "eval_samples_per_second": 91.593,
      "eval_steps_per_second": 5.725,
      "step": 365000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8341338456612235e-05,
      "loss": 3.0137,
      "step": 365100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8338136407300675e-05,
      "loss": 3.0162,
      "step": 365200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8334934357989115e-05,
      "loss": 3.0098,
      "step": 365300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8331732308677554e-05,
      "loss": 3.0153,
      "step": 365400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8328530259365994e-05,
      "loss": 3.0292,
      "step": 365500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8325328210054434e-05,
      "loss": 3.0261,
      "step": 365600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.832212616074288e-05,
      "loss": 3.032,
      "step": 365700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.831892411143132e-05,
      "loss": 3.0359,
      "step": 365800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.831572206211976e-05,
      "loss": 3.0171,
      "step": 365900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.83125200128082e-05,
      "loss": 3.0235,
      "step": 366000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.7287421226501465,
      "eval_runtime": 109.4268,
      "eval_samples_per_second": 91.385,
      "eval_steps_per_second": 5.712,
      "step": 366000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.830931796349664e-05,
      "loss": 3.0219,
      "step": 366100
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.830611591418508e-05,
      "loss": 3.0212,
      "step": 366200
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.830291386487352e-05,
      "loss": 3.0007,
      "step": 366300
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.829971181556196e-05,
      "loss": 3.0389,
      "step": 366400
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8296509766250405e-05,
      "loss": 3.0042,
      "step": 366500
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8293307716938845e-05,
      "loss": 3.0047,
      "step": 366600
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8290105667627285e-05,
      "loss": 3.0307,
      "step": 366700
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8286903618315724e-05,
      "loss": 3.0086,
      "step": 366800
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8283701569004164e-05,
      "loss": 3.0445,
      "step": 366900
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8280499519692604e-05,
      "loss": 3.015,
      "step": 367000
    },
    {
      "epoch": 0.23,
      "eval_loss": 2.7265288829803467,
      "eval_runtime": 110.0199,
      "eval_samples_per_second": 90.893,
      "eval_steps_per_second": 5.681,
      "step": 367000
    },
    {
      "epoch": 0.23,
      "learning_rate": 3.8277297470381043e-05,
      "loss": 3.0238,
      "step": 367100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.827409542106948e-05,
      "loss": 3.0196,
      "step": 367200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.827089337175793e-05,
      "loss": 3.0278,
      "step": 367300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.826769132244637e-05,
      "loss": 3.0226,
      "step": 367400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.826448927313481e-05,
      "loss": 3.0106,
      "step": 367500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.826128722382325e-05,
      "loss": 2.9979,
      "step": 367600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.825808517451169e-05,
      "loss": 3.0226,
      "step": 367700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.825488312520013e-05,
      "loss": 3.0114,
      "step": 367800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.825168107588857e-05,
      "loss": 3.006,
      "step": 367900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.824847902657701e-05,
      "loss": 3.0042,
      "step": 368000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.730778217315674,
      "eval_runtime": 109.3698,
      "eval_samples_per_second": 91.433,
      "eval_steps_per_second": 5.715,
      "step": 368000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8245276977265454e-05,
      "loss": 3.0238,
      "step": 368100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8242074927953894e-05,
      "loss": 3.0092,
      "step": 368200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8238872878642334e-05,
      "loss": 3.028,
      "step": 368300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8235670829330774e-05,
      "loss": 3.0185,
      "step": 368400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.823246878001921e-05,
      "loss": 3.0239,
      "step": 368500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.822926673070765e-05,
      "loss": 3.0339,
      "step": 368600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.822606468139609e-05,
      "loss": 3.0142,
      "step": 368700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.822286263208454e-05,
      "loss": 3.0293,
      "step": 368800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.821966058277298e-05,
      "loss": 3.0224,
      "step": 368900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.821645853346142e-05,
      "loss": 3.0096,
      "step": 369000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7294204235076904,
      "eval_runtime": 109.7406,
      "eval_samples_per_second": 91.124,
      "eval_steps_per_second": 5.695,
      "step": 369000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.821325648414986e-05,
      "loss": 3.0329,
      "step": 369100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.82100544348383e-05,
      "loss": 3.0239,
      "step": 369200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.820685238552674e-05,
      "loss": 3.013,
      "step": 369300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.820365033621518e-05,
      "loss": 2.9961,
      "step": 369400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8200448286903624e-05,
      "loss": 3.0072,
      "step": 369500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.819724623759206e-05,
      "loss": 3.008,
      "step": 369600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8194044188280504e-05,
      "loss": 3.0152,
      "step": 369700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8190842138968944e-05,
      "loss": 3.0202,
      "step": 369800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.818764008965738e-05,
      "loss": 3.0266,
      "step": 369900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.818443804034582e-05,
      "loss": 3.0084,
      "step": 370000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7294363975524902,
      "eval_runtime": 109.4129,
      "eval_samples_per_second": 91.397,
      "eval_steps_per_second": 5.712,
      "step": 370000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.818123599103426e-05,
      "loss": 3.0159,
      "step": 370100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.81780339417227e-05,
      "loss": 3.0119,
      "step": 370200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.817483189241114e-05,
      "loss": 3.0252,
      "step": 370300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.817162984309959e-05,
      "loss": 3.0207,
      "step": 370400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.816842779378803e-05,
      "loss": 3.0067,
      "step": 370500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.816522574447647e-05,
      "loss": 3.0398,
      "step": 370600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.816202369516491e-05,
      "loss": 2.9958,
      "step": 370700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.815882164585335e-05,
      "loss": 3.0079,
      "step": 370800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.815561959654179e-05,
      "loss": 3.0235,
      "step": 370900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.815241754723023e-05,
      "loss": 2.9961,
      "step": 371000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7283895015716553,
      "eval_runtime": 109.7333,
      "eval_samples_per_second": 91.13,
      "eval_steps_per_second": 5.696,
      "step": 371000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8149215497918674e-05,
      "loss": 3.0183,
      "step": 371100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.814601344860711e-05,
      "loss": 3.0292,
      "step": 371200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.814281139929555e-05,
      "loss": 3.0033,
      "step": 371300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.813960934998399e-05,
      "loss": 3.0057,
      "step": 371400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.813640730067243e-05,
      "loss": 3.0193,
      "step": 371500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.813320525136087e-05,
      "loss": 3.0168,
      "step": 371600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.813000320204931e-05,
      "loss": 3.0186,
      "step": 371700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.812680115273776e-05,
      "loss": 3.0355,
      "step": 371800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.812359910342619e-05,
      "loss": 3.0218,
      "step": 371900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.812039705411464e-05,
      "loss": 2.9924,
      "step": 372000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7280290126800537,
      "eval_runtime": 109.1823,
      "eval_samples_per_second": 91.59,
      "eval_steps_per_second": 5.724,
      "step": 372000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.811719500480308e-05,
      "loss": 3.0018,
      "step": 372100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.811399295549152e-05,
      "loss": 3.0267,
      "step": 372200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.811079090617996e-05,
      "loss": 3.0044,
      "step": 372300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.81075888568684e-05,
      "loss": 3.0418,
      "step": 372400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.810438680755684e-05,
      "loss": 3.0343,
      "step": 372500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8101184758245277e-05,
      "loss": 3.0221,
      "step": 372600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.809798270893372e-05,
      "loss": 3.0031,
      "step": 372700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8094780659622156e-05,
      "loss": 3.0058,
      "step": 372800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.80915786103106e-05,
      "loss": 3.0128,
      "step": 372900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.808837656099904e-05,
      "loss": 3.0137,
      "step": 373000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7276806831359863,
      "eval_runtime": 109.2957,
      "eval_samples_per_second": 91.495,
      "eval_steps_per_second": 5.718,
      "step": 373000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.808517451168748e-05,
      "loss": 3.0038,
      "step": 373100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.808197246237592e-05,
      "loss": 3.0358,
      "step": 373200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.807877041306436e-05,
      "loss": 3.0063,
      "step": 373300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.807556836375281e-05,
      "loss": 3.0145,
      "step": 373400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.807236631444124e-05,
      "loss": 3.0346,
      "step": 373500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.806916426512969e-05,
      "loss": 2.9972,
      "step": 373600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.806596221581813e-05,
      "loss": 3.0496,
      "step": 373700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.806276016650657e-05,
      "loss": 3.037,
      "step": 373800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.805955811719501e-05,
      "loss": 3.0175,
      "step": 373900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8056356067883446e-05,
      "loss": 3.014,
      "step": 374000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7246484756469727,
      "eval_runtime": 109.2339,
      "eval_samples_per_second": 91.547,
      "eval_steps_per_second": 5.722,
      "step": 374000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.805315401857189e-05,
      "loss": 3.0161,
      "step": 374100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8049951969260326e-05,
      "loss": 3.0191,
      "step": 374200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.804674991994877e-05,
      "loss": 3.035,
      "step": 374300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8043547870637205e-05,
      "loss": 3.0115,
      "step": 374400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.804034582132565e-05,
      "loss": 3.0107,
      "step": 374500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.803714377201409e-05,
      "loss": 3.0299,
      "step": 374600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.803394172270253e-05,
      "loss": 3.0128,
      "step": 374700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.803073967339097e-05,
      "loss": 3.0022,
      "step": 374800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.802753762407941e-05,
      "loss": 2.9928,
      "step": 374900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.802433557476786e-05,
      "loss": 3.0252,
      "step": 375000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7228286266326904,
      "eval_runtime": 125.8815,
      "eval_samples_per_second": 79.44,
      "eval_steps_per_second": 4.965,
      "step": 375000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.802113352545629e-05,
      "loss": 3.0408,
      "step": 375100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.801793147614474e-05,
      "loss": 3.0227,
      "step": 375200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8014729426833177e-05,
      "loss": 3.0397,
      "step": 375300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8011527377521616e-05,
      "loss": 3.0156,
      "step": 375400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8008325328210056e-05,
      "loss": 3.0068,
      "step": 375500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.8005123278898496e-05,
      "loss": 3.0294,
      "step": 375600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.800192122958694e-05,
      "loss": 3.0092,
      "step": 375700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7998719180275375e-05,
      "loss": 3.0224,
      "step": 375800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.799551713096382e-05,
      "loss": 3.0046,
      "step": 375900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7992315081652255e-05,
      "loss": 2.9928,
      "step": 376000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.722789764404297,
      "eval_runtime": 109.6215,
      "eval_samples_per_second": 91.223,
      "eval_steps_per_second": 5.701,
      "step": 376000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.79891130323407e-05,
      "loss": 3.015,
      "step": 376100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.798591098302914e-05,
      "loss": 3.0002,
      "step": 376200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.798270893371758e-05,
      "loss": 3.0402,
      "step": 376300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.797950688440603e-05,
      "loss": 3.0154,
      "step": 376400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.797630483509446e-05,
      "loss": 3.0238,
      "step": 376500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.797310278578291e-05,
      "loss": 2.9985,
      "step": 376600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.796990073647134e-05,
      "loss": 3.0278,
      "step": 376700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7966698687159786e-05,
      "loss": 3.0228,
      "step": 376800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7963496637848226e-05,
      "loss": 3.0154,
      "step": 376900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7960294588536666e-05,
      "loss": 3.0052,
      "step": 377000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.722949266433716,
      "eval_runtime": 129.5623,
      "eval_samples_per_second": 77.183,
      "eval_steps_per_second": 4.824,
      "step": 377000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7957092539225105e-05,
      "loss": 3.0088,
      "step": 377100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7953890489913545e-05,
      "loss": 3.0139,
      "step": 377200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.795068844060199e-05,
      "loss": 3.0132,
      "step": 377300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7947486391290425e-05,
      "loss": 3.0167,
      "step": 377400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.794428434197887e-05,
      "loss": 3.0208,
      "step": 377500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7941082292667304e-05,
      "loss": 3.01,
      "step": 377600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.793788024335575e-05,
      "loss": 3.0186,
      "step": 377700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.793467819404419e-05,
      "loss": 3.0256,
      "step": 377800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.793147614473263e-05,
      "loss": 3.0003,
      "step": 377900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.792827409542108e-05,
      "loss": 3.0573,
      "step": 378000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7206273078918457,
      "eval_runtime": 110.0716,
      "eval_samples_per_second": 90.85,
      "eval_steps_per_second": 5.678,
      "step": 378000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.792507204610951e-05,
      "loss": 3.0462,
      "step": 378100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7921869996797956e-05,
      "loss": 3.0574,
      "step": 378200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.791866794748639e-05,
      "loss": 3.0216,
      "step": 378300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7915465898174836e-05,
      "loss": 3.0126,
      "step": 378400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7912263848863275e-05,
      "loss": 2.9959,
      "step": 378500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7909061799551715e-05,
      "loss": 3.0058,
      "step": 378600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7905859750240155e-05,
      "loss": 3.021,
      "step": 378700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7902657700928595e-05,
      "loss": 3.0192,
      "step": 378800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.789945565161704e-05,
      "loss": 3.0444,
      "step": 378900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7896253602305474e-05,
      "loss": 3.0173,
      "step": 379000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7208235263824463,
      "eval_runtime": 109.9333,
      "eval_samples_per_second": 90.964,
      "eval_steps_per_second": 5.685,
      "step": 379000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.789305155299392e-05,
      "loss": 3.0238,
      "step": 379100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7889849503682353e-05,
      "loss": 3.018,
      "step": 379200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.78866474543708e-05,
      "loss": 3.024,
      "step": 379300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.788344540505924e-05,
      "loss": 3.0242,
      "step": 379400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.788024335574768e-05,
      "loss": 3.0305,
      "step": 379500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7877041306436126e-05,
      "loss": 3.0182,
      "step": 379600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.787383925712456e-05,
      "loss": 3.0162,
      "step": 379700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7870637207813005e-05,
      "loss": 3.0041,
      "step": 379800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.786743515850144e-05,
      "loss": 3.0363,
      "step": 379900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7864233109189885e-05,
      "loss": 2.9865,
      "step": 380000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7212319374084473,
      "eval_runtime": 109.4276,
      "eval_samples_per_second": 91.385,
      "eval_steps_per_second": 5.712,
      "step": 380000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7861031059878325e-05,
      "loss": 2.987,
      "step": 380100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7857829010566764e-05,
      "loss": 3.0052,
      "step": 380200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7854626961255204e-05,
      "loss": 3.0378,
      "step": 380300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7851424911943644e-05,
      "loss": 3.0284,
      "step": 380400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.784822286263209e-05,
      "loss": 3.0169,
      "step": 380500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7845020813320523e-05,
      "loss": 3.019,
      "step": 380600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.784181876400897e-05,
      "loss": 3.0202,
      "step": 380700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.78386167146974e-05,
      "loss": 3.0342,
      "step": 380800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.783541466538585e-05,
      "loss": 3.0038,
      "step": 380900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.783221261607429e-05,
      "loss": 3.0073,
      "step": 381000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.7234158515930176,
      "eval_runtime": 109.4591,
      "eval_samples_per_second": 91.358,
      "eval_steps_per_second": 5.71,
      "step": 381000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.782901056676273e-05,
      "loss": 3.0378,
      "step": 381100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7825808517451175e-05,
      "loss": 3.0051,
      "step": 381200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.782260646813961e-05,
      "loss": 3.0019,
      "step": 381300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7819404418828055e-05,
      "loss": 3.006,
      "step": 381400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.781620236951649e-05,
      "loss": 3.0046,
      "step": 381500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7813000320204934e-05,
      "loss": 3.0077,
      "step": 381600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7809798270893374e-05,
      "loss": 3.0169,
      "step": 381700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7806596221581814e-05,
      "loss": 3.0172,
      "step": 381800
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7803394172270254e-05,
      "loss": 3.0442,
      "step": 381900
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.780019212295869e-05,
      "loss": 3.0218,
      "step": 382000
    },
    {
      "epoch": 0.24,
      "eval_loss": 2.721240758895874,
      "eval_runtime": 109.3358,
      "eval_samples_per_second": 91.461,
      "eval_steps_per_second": 5.716,
      "step": 382000
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.779699007364714e-05,
      "loss": 3.0266,
      "step": 382100
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.779378802433557e-05,
      "loss": 3.0184,
      "step": 382200
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.779058597502402e-05,
      "loss": 2.9812,
      "step": 382300
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.778738392571245e-05,
      "loss": 3.0022,
      "step": 382400
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.77841818764009e-05,
      "loss": 3.0372,
      "step": 382500
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.778097982708934e-05,
      "loss": 3.0107,
      "step": 382600
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.777777777777778e-05,
      "loss": 3.0364,
      "step": 382700
    },
    {
      "epoch": 0.24,
      "learning_rate": 3.7774575728466225e-05,
      "loss": 2.9863,
      "step": 382800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.777137367915466e-05,
      "loss": 3.0204,
      "step": 382900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7768171629843104e-05,
      "loss": 3.0094,
      "step": 383000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.723273277282715,
      "eval_runtime": 109.4718,
      "eval_samples_per_second": 91.348,
      "eval_steps_per_second": 5.709,
      "step": 383000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.776496958053154e-05,
      "loss": 3.0302,
      "step": 383100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7761767531219984e-05,
      "loss": 2.9997,
      "step": 383200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7758565481908423e-05,
      "loss": 3.0206,
      "step": 383300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.775536343259686e-05,
      "loss": 3.0239,
      "step": 383400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.77521613832853e-05,
      "loss": 3.0387,
      "step": 383500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.774895933397374e-05,
      "loss": 3.0124,
      "step": 383600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.774575728466219e-05,
      "loss": 3.0034,
      "step": 383700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.774255523535062e-05,
      "loss": 3.0349,
      "step": 383800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.773935318603907e-05,
      "loss": 3.0468,
      "step": 383900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.773615113672751e-05,
      "loss": 3.0254,
      "step": 384000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7192537784576416,
      "eval_runtime": 110.3209,
      "eval_samples_per_second": 90.645,
      "eval_steps_per_second": 5.665,
      "step": 384000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.773294908741595e-05,
      "loss": 2.9988,
      "step": 384100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.772974703810439e-05,
      "loss": 3.0121,
      "step": 384200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.772654498879283e-05,
      "loss": 3.031,
      "step": 384300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7723342939481274e-05,
      "loss": 3.0395,
      "step": 384400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.772014089016971e-05,
      "loss": 3.0164,
      "step": 384500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7716938840858154e-05,
      "loss": 3.0059,
      "step": 384600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7713736791546587e-05,
      "loss": 3.0328,
      "step": 384700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.771053474223503e-05,
      "loss": 3.0342,
      "step": 384800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.770733269292347e-05,
      "loss": 2.9844,
      "step": 384900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.770413064361191e-05,
      "loss": 3.0041,
      "step": 385000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7241554260253906,
      "eval_runtime": 109.7098,
      "eval_samples_per_second": 91.15,
      "eval_steps_per_second": 5.697,
      "step": 385000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.770092859430035e-05,
      "loss": 3.0164,
      "step": 385100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.769772654498879e-05,
      "loss": 3.0006,
      "step": 385200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.769452449567724e-05,
      "loss": 3.0264,
      "step": 385300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.769132244636567e-05,
      "loss": 2.9883,
      "step": 385400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.768812039705412e-05,
      "loss": 2.9988,
      "step": 385500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.768491834774256e-05,
      "loss": 3.036,
      "step": 385600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7681716298431e-05,
      "loss": 3.0057,
      "step": 385700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.767851424911944e-05,
      "loss": 2.9977,
      "step": 385800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.767531219980788e-05,
      "loss": 3.0135,
      "step": 385900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7672110150496323e-05,
      "loss": 3.0096,
      "step": 386000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.718982696533203,
      "eval_runtime": 109.7748,
      "eval_samples_per_second": 91.096,
      "eval_steps_per_second": 5.693,
      "step": 386000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7668908101184756e-05,
      "loss": 3.0068,
      "step": 386100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.76657060518732e-05,
      "loss": 2.9666,
      "step": 386200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.766250400256164e-05,
      "loss": 3.0266,
      "step": 386300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.765930195325008e-05,
      "loss": 3.0005,
      "step": 386400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.765609990393852e-05,
      "loss": 3.0123,
      "step": 386500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.765289785462696e-05,
      "loss": 3.0098,
      "step": 386600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.76496958053154e-05,
      "loss": 3.018,
      "step": 386700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.764649375600384e-05,
      "loss": 3.0498,
      "step": 386800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.764329170669229e-05,
      "loss": 3.0327,
      "step": 386900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.764008965738073e-05,
      "loss": 3.0255,
      "step": 387000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7171742916107178,
      "eval_runtime": 109.5579,
      "eval_samples_per_second": 91.276,
      "eval_steps_per_second": 5.705,
      "step": 387000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.763688760806917e-05,
      "loss": 3.0185,
      "step": 387100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.763368555875761e-05,
      "loss": 3.0339,
      "step": 387200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.763048350944605e-05,
      "loss": 2.9961,
      "step": 387300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7627281460134487e-05,
      "loss": 3.0089,
      "step": 387400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7624079410822926e-05,
      "loss": 3.0012,
      "step": 387500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.762087736151137e-05,
      "loss": 3.0181,
      "step": 387600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7617675312199806e-05,
      "loss": 3.0232,
      "step": 387700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.761447326288825e-05,
      "loss": 3.0129,
      "step": 387800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.761127121357669e-05,
      "loss": 3.0308,
      "step": 387900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.760806916426513e-05,
      "loss": 3.0024,
      "step": 388000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.719799518585205,
      "eval_runtime": 109.6445,
      "eval_samples_per_second": 91.204,
      "eval_steps_per_second": 5.7,
      "step": 388000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.760486711495357e-05,
      "loss": 2.9923,
      "step": 388100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.760166506564201e-05,
      "loss": 3.0138,
      "step": 388200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.759846301633045e-05,
      "loss": 2.9949,
      "step": 388300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.759526096701889e-05,
      "loss": 3.0147,
      "step": 388400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.759205891770734e-05,
      "loss": 3.0138,
      "step": 388500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.758885686839578e-05,
      "loss": 3.0098,
      "step": 388600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.758565481908422e-05,
      "loss": 3.0081,
      "step": 388700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7582452769772657e-05,
      "loss": 2.9822,
      "step": 388800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7579250720461096e-05,
      "loss": 3.0217,
      "step": 388900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7576048671149536e-05,
      "loss": 3.0302,
      "step": 389000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7194199562072754,
      "eval_runtime": 109.1836,
      "eval_samples_per_second": 91.589,
      "eval_steps_per_second": 5.724,
      "step": 389000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7572846621837976e-05,
      "loss": 3.0134,
      "step": 389100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.756964457252642e-05,
      "loss": 3.0031,
      "step": 389200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.756644252321486e-05,
      "loss": 3.0173,
      "step": 389300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.75632404739033e-05,
      "loss": 2.9957,
      "step": 389400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.756003842459174e-05,
      "loss": 3.018,
      "step": 389500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.755683637528018e-05,
      "loss": 3.0247,
      "step": 389600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.755363432596862e-05,
      "loss": 3.0226,
      "step": 389700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.755043227665706e-05,
      "loss": 3.014,
      "step": 389800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.75472302273455e-05,
      "loss": 3.0267,
      "step": 389900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.754402817803394e-05,
      "loss": 3.0099,
      "step": 390000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7192628383636475,
      "eval_runtime": 125.8644,
      "eval_samples_per_second": 79.451,
      "eval_steps_per_second": 4.966,
      "step": 390000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.754082612872239e-05,
      "loss": 3.0212,
      "step": 390100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7537624079410826e-05,
      "loss": 3.0106,
      "step": 390200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7534422030099266e-05,
      "loss": 3.0111,
      "step": 390300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7531219980787706e-05,
      "loss": 3.0138,
      "step": 390400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7528017931476146e-05,
      "loss": 3.0096,
      "step": 390500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7524815882164585e-05,
      "loss": 3.0166,
      "step": 390600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7521613832853025e-05,
      "loss": 3.0099,
      "step": 390700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.751841178354147e-05,
      "loss": 3.0033,
      "step": 390800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.751520973422991e-05,
      "loss": 3.0262,
      "step": 390900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.751200768491835e-05,
      "loss": 3.0559,
      "step": 391000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7186601161956787,
      "eval_runtime": 109.0179,
      "eval_samples_per_second": 91.728,
      "eval_steps_per_second": 5.733,
      "step": 391000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.750880563560679e-05,
      "loss": 3.0312,
      "step": 391100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.750560358629523e-05,
      "loss": 2.9962,
      "step": 391200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.750240153698367e-05,
      "loss": 3.0181,
      "step": 391300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.749919948767211e-05,
      "loss": 3.0147,
      "step": 391400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.749599743836055e-05,
      "loss": 3.0007,
      "step": 391500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7492795389048996e-05,
      "loss": 3.0132,
      "step": 391600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7489593339737436e-05,
      "loss": 3.0388,
      "step": 391700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7486391290425876e-05,
      "loss": 3.0053,
      "step": 391800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7483189241114316e-05,
      "loss": 3.0331,
      "step": 391900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7479987191802755e-05,
      "loss": 3.0152,
      "step": 392000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.717024564743042,
      "eval_runtime": 126.8741,
      "eval_samples_per_second": 78.818,
      "eval_steps_per_second": 4.926,
      "step": 392000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7476785142491195e-05,
      "loss": 2.9591,
      "step": 392100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7473583093179635e-05,
      "loss": 3.0084,
      "step": 392200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7470381043868074e-05,
      "loss": 3.0359,
      "step": 392300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.746717899455652e-05,
      "loss": 2.9952,
      "step": 392400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.746397694524496e-05,
      "loss": 2.9893,
      "step": 392500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.74607748959334e-05,
      "loss": 2.9973,
      "step": 392600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.745757284662184e-05,
      "loss": 3.0173,
      "step": 392700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.745437079731028e-05,
      "loss": 3.0406,
      "step": 392800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.745116874799872e-05,
      "loss": 3.0283,
      "step": 392900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.744796669868716e-05,
      "loss": 3.0195,
      "step": 393000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7208597660064697,
      "eval_runtime": 109.8665,
      "eval_samples_per_second": 91.02,
      "eval_steps_per_second": 5.689,
      "step": 393000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.74447646493756e-05,
      "loss": 3.0011,
      "step": 393100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7441562600064046e-05,
      "loss": 3.0009,
      "step": 393200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7438360550752485e-05,
      "loss": 3.0142,
      "step": 393300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7435158501440925e-05,
      "loss": 3.0217,
      "step": 393400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7431956452129365e-05,
      "loss": 3.0074,
      "step": 393500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7428754402817805e-05,
      "loss": 3.0096,
      "step": 393600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7425552353506244e-05,
      "loss": 3.0275,
      "step": 393700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7422350304194684e-05,
      "loss": 3.0168,
      "step": 393800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.741914825488313e-05,
      "loss": 3.0119,
      "step": 393900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.741594620557157e-05,
      "loss": 3.0082,
      "step": 394000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7191293239593506,
      "eval_runtime": 109.9634,
      "eval_samples_per_second": 90.939,
      "eval_steps_per_second": 5.684,
      "step": 394000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.741274415626001e-05,
      "loss": 3.0083,
      "step": 394100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.740954210694845e-05,
      "loss": 2.9945,
      "step": 394200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.740634005763689e-05,
      "loss": 3.0263,
      "step": 394300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.740313800832533e-05,
      "loss": 3.0269,
      "step": 394400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.739993595901377e-05,
      "loss": 3.041,
      "step": 394500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.739673390970221e-05,
      "loss": 3.0018,
      "step": 394600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.739353186039065e-05,
      "loss": 3.0274,
      "step": 394700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7390329811079095e-05,
      "loss": 3.0116,
      "step": 394800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7387127761767535e-05,
      "loss": 3.038,
      "step": 394900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7383925712455975e-05,
      "loss": 2.9932,
      "step": 395000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.717728853225708,
      "eval_runtime": 111.0643,
      "eval_samples_per_second": 90.038,
      "eval_steps_per_second": 5.627,
      "step": 395000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7380723663144414e-05,
      "loss": 3.026,
      "step": 395100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7377521613832854e-05,
      "loss": 3.0131,
      "step": 395200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7374319564521294e-05,
      "loss": 3.0089,
      "step": 395300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7371117515209733e-05,
      "loss": 2.9925,
      "step": 395400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.736791546589818e-05,
      "loss": 3.0509,
      "step": 395500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.736471341658662e-05,
      "loss": 3.0147,
      "step": 395600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.736151136727506e-05,
      "loss": 3.0315,
      "step": 395700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.73583093179635e-05,
      "loss": 3.0152,
      "step": 395800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.735510726865194e-05,
      "loss": 3.0405,
      "step": 395900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.735190521934038e-05,
      "loss": 3.0073,
      "step": 396000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.718879222869873,
      "eval_runtime": 109.7429,
      "eval_samples_per_second": 91.122,
      "eval_steps_per_second": 5.695,
      "step": 396000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.734870317002882e-05,
      "loss": 3.0422,
      "step": 396100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7345501120717265e-05,
      "loss": 3.0169,
      "step": 396200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.73422990714057e-05,
      "loss": 3.0032,
      "step": 396300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7339097022094144e-05,
      "loss": 3.0202,
      "step": 396400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7335894972782584e-05,
      "loss": 3.0189,
      "step": 396500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7332692923471024e-05,
      "loss": 3.0223,
      "step": 396600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7329490874159464e-05,
      "loss": 3.001,
      "step": 396700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.73262888248479e-05,
      "loss": 2.9789,
      "step": 396800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.732308677553635e-05,
      "loss": 3.0401,
      "step": 396900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.731988472622478e-05,
      "loss": 3.0387,
      "step": 397000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7175886631011963,
      "eval_runtime": 109.2198,
      "eval_samples_per_second": 91.559,
      "eval_steps_per_second": 5.722,
      "step": 397000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.731668267691323e-05,
      "loss": 3.006,
      "step": 397100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.731348062760167e-05,
      "loss": 2.992,
      "step": 397200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.731027857829011e-05,
      "loss": 3.0068,
      "step": 397300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.730707652897855e-05,
      "loss": 3.0087,
      "step": 397400
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.730387447966699e-05,
      "loss": 2.999,
      "step": 397500
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.730067243035543e-05,
      "loss": 3.0123,
      "step": 397600
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.729747038104387e-05,
      "loss": 3.0284,
      "step": 397700
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7294268331732314e-05,
      "loss": 2.9917,
      "step": 397800
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.729106628242075e-05,
      "loss": 2.9626,
      "step": 397900
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7287864233109194e-05,
      "loss": 3.0191,
      "step": 398000
    },
    {
      "epoch": 0.25,
      "eval_loss": 2.7173280715942383,
      "eval_runtime": 109.44,
      "eval_samples_per_second": 91.374,
      "eval_steps_per_second": 5.711,
      "step": 398000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7284662183797634e-05,
      "loss": 3.0266,
      "step": 398100
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.728146013448607e-05,
      "loss": 3.0484,
      "step": 398200
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.727825808517451e-05,
      "loss": 2.9944,
      "step": 398300
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.727505603586295e-05,
      "loss": 3.0082,
      "step": 398400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.72718539865514e-05,
      "loss": 3.0034,
      "step": 398500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.726865193723983e-05,
      "loss": 3.0088,
      "step": 398600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.726544988792828e-05,
      "loss": 2.9997,
      "step": 398700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.726224783861672e-05,
      "loss": 2.9981,
      "step": 398800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.725904578930516e-05,
      "loss": 2.9949,
      "step": 398900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.72558437399936e-05,
      "loss": 2.9974,
      "step": 399000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7185330390930176,
      "eval_runtime": 126.0289,
      "eval_samples_per_second": 79.347,
      "eval_steps_per_second": 4.959,
      "step": 399000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.725264169068204e-05,
      "loss": 2.9998,
      "step": 399100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7249439641370484e-05,
      "loss": 3.0031,
      "step": 399200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.724623759205892e-05,
      "loss": 3.0291,
      "step": 399300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7243035542747364e-05,
      "loss": 3.0096,
      "step": 399400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.72398334934358e-05,
      "loss": 3.0047,
      "step": 399500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.723663144412424e-05,
      "loss": 2.9782,
      "step": 399600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.723342939481268e-05,
      "loss": 3.0401,
      "step": 399700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.723022734550112e-05,
      "loss": 3.0085,
      "step": 399800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.722702529618956e-05,
      "loss": 3.0134,
      "step": 399900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7223823246878e-05,
      "loss": 3.0039,
      "step": 400000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7189953327178955,
      "eval_runtime": 109.0195,
      "eval_samples_per_second": 91.727,
      "eval_steps_per_second": 5.733,
      "step": 400000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.722062119756645e-05,
      "loss": 3.0158,
      "step": 400100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.721741914825488e-05,
      "loss": 3.004,
      "step": 400200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.721421709894333e-05,
      "loss": 2.9969,
      "step": 400300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.721101504963176e-05,
      "loss": 3.0045,
      "step": 400400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.720781300032021e-05,
      "loss": 3.0319,
      "step": 400500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.720461095100865e-05,
      "loss": 3.0106,
      "step": 400600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.720140890169709e-05,
      "loss": 3.0193,
      "step": 400700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7198206852385534e-05,
      "loss": 3.0214,
      "step": 400800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7195004803073967e-05,
      "loss": 3.0592,
      "step": 400900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.719180275376241e-05,
      "loss": 3.0298,
      "step": 401000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.715094566345215,
      "eval_runtime": 109.06,
      "eval_samples_per_second": 91.693,
      "eval_steps_per_second": 5.731,
      "step": 401000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7188600704450846e-05,
      "loss": 3.0084,
      "step": 401100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.718539865513929e-05,
      "loss": 3.0206,
      "step": 401200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.718219660582773e-05,
      "loss": 3.0096,
      "step": 401300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.717899455651617e-05,
      "loss": 3.0165,
      "step": 401400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.717579250720462e-05,
      "loss": 2.9969,
      "step": 401500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.717259045789305e-05,
      "loss": 3.0162,
      "step": 401600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.71693884085815e-05,
      "loss": 3.0028,
      "step": 401700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.716618635926993e-05,
      "loss": 2.9934,
      "step": 401800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.716298430995838e-05,
      "loss": 3.0063,
      "step": 401900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.715978226064681e-05,
      "loss": 3.0324,
      "step": 402000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7157955169677734,
      "eval_runtime": 109.1182,
      "eval_samples_per_second": 91.644,
      "eval_steps_per_second": 5.728,
      "step": 402000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.715658021133526e-05,
      "loss": 3.0006,
      "step": 402100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.71533781620237e-05,
      "loss": 3.028,
      "step": 402200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7150176112712136e-05,
      "loss": 3.0094,
      "step": 402300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.714697406340058e-05,
      "loss": 3.0079,
      "step": 402400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7143772014089016e-05,
      "loss": 3.0208,
      "step": 402500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.714056996477746e-05,
      "loss": 3.0378,
      "step": 402600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7137367915465895e-05,
      "loss": 3.0251,
      "step": 402700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.713416586615434e-05,
      "loss": 3.0094,
      "step": 402800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.713096381684278e-05,
      "loss": 3.0347,
      "step": 402900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.712776176753122e-05,
      "loss": 2.9641,
      "step": 403000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.716461420059204,
      "eval_runtime": 110.4615,
      "eval_samples_per_second": 90.529,
      "eval_steps_per_second": 5.658,
      "step": 403000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.712455971821967e-05,
      "loss": 3.0023,
      "step": 403100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.71213576689081e-05,
      "loss": 3.0326,
      "step": 403200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.711815561959655e-05,
      "loss": 3.0179,
      "step": 403300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.711495357028498e-05,
      "loss": 3.0286,
      "step": 403400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.711175152097343e-05,
      "loss": 2.9967,
      "step": 403500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.710854947166186e-05,
      "loss": 2.9817,
      "step": 403600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7105347422350306e-05,
      "loss": 3.0232,
      "step": 403700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7102145373038746e-05,
      "loss": 3.0118,
      "step": 403800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7098943323727186e-05,
      "loss": 3.0284,
      "step": 403900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.709574127441563e-05,
      "loss": 3.0219,
      "step": 404000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.713331460952759,
      "eval_runtime": 110.3084,
      "eval_samples_per_second": 90.655,
      "eval_steps_per_second": 5.666,
      "step": 404000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7092539225104065e-05,
      "loss": 2.9963,
      "step": 404100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.708933717579251e-05,
      "loss": 2.9859,
      "step": 404200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7086135126480945e-05,
      "loss": 3.0173,
      "step": 404300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.708293307716939e-05,
      "loss": 2.9977,
      "step": 404400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.707973102785783e-05,
      "loss": 3.0385,
      "step": 404500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.707652897854627e-05,
      "loss": 2.9862,
      "step": 404600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.707332692923472e-05,
      "loss": 2.9793,
      "step": 404700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.707012487992315e-05,
      "loss": 3.0223,
      "step": 404800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.70669228306116e-05,
      "loss": 3.011,
      "step": 404900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.706372078130003e-05,
      "loss": 3.0026,
      "step": 405000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7150590419769287,
      "eval_runtime": 109.2166,
      "eval_samples_per_second": 91.561,
      "eval_steps_per_second": 5.723,
      "step": 405000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7060518731988476e-05,
      "loss": 3.0148,
      "step": 405100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.705731668267691e-05,
      "loss": 3.0084,
      "step": 405200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7054114633365356e-05,
      "loss": 3.0065,
      "step": 405300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7050912584053795e-05,
      "loss": 2.9932,
      "step": 405400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7047710534742235e-05,
      "loss": 3.0105,
      "step": 405500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.704450848543068e-05,
      "loss": 2.9826,
      "step": 405600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7041306436119115e-05,
      "loss": 2.9892,
      "step": 405700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.703810438680756e-05,
      "loss": 2.9945,
      "step": 405800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7034902337495994e-05,
      "loss": 3.0239,
      "step": 405900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.703170028818444e-05,
      "loss": 2.9897,
      "step": 406000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7127273082733154,
      "eval_runtime": 108.9491,
      "eval_samples_per_second": 91.786,
      "eval_steps_per_second": 5.737,
      "step": 406000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.702849823887288e-05,
      "loss": 3.0091,
      "step": 406100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.702529618956132e-05,
      "loss": 2.9914,
      "step": 406200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.702209414024977e-05,
      "loss": 2.9791,
      "step": 406300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.70188920909382e-05,
      "loss": 3.0222,
      "step": 406400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7015690041626646e-05,
      "loss": 2.9958,
      "step": 406500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.701248799231508e-05,
      "loss": 2.9873,
      "step": 406600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7009285943003526e-05,
      "loss": 2.9918,
      "step": 406700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7006083893691965e-05,
      "loss": 3.024,
      "step": 406800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.7002881844380405e-05,
      "loss": 3.0016,
      "step": 406900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6999679795068845e-05,
      "loss": 3.0121,
      "step": 407000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.711146831512451,
      "eval_runtime": 108.8356,
      "eval_samples_per_second": 91.882,
      "eval_steps_per_second": 5.743,
      "step": 407000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6996477745757285e-05,
      "loss": 3.0006,
      "step": 407100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.699327569644573e-05,
      "loss": 3.0046,
      "step": 407200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6990073647134164e-05,
      "loss": 3.0321,
      "step": 407300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.698687159782261e-05,
      "loss": 2.9998,
      "step": 407400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6983669548511043e-05,
      "loss": 3.0182,
      "step": 407500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.698046749919949e-05,
      "loss": 3.0193,
      "step": 407600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.697726544988793e-05,
      "loss": 3.0267,
      "step": 407700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.697406340057637e-05,
      "loss": 3.0042,
      "step": 407800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6970861351264816e-05,
      "loss": 3.0045,
      "step": 407900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.696765930195325e-05,
      "loss": 3.0384,
      "step": 408000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7107160091400146,
      "eval_runtime": 126.8751,
      "eval_samples_per_second": 78.818,
      "eval_steps_per_second": 4.926,
      "step": 408000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6964457252641695e-05,
      "loss": 2.9877,
      "step": 408100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.696125520333013e-05,
      "loss": 3.0276,
      "step": 408200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6958053154018575e-05,
      "loss": 3.0126,
      "step": 408300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6954851104707015e-05,
      "loss": 3.0261,
      "step": 408400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6951649055395454e-05,
      "loss": 3.0062,
      "step": 408500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6948447006083894e-05,
      "loss": 3.0014,
      "step": 408600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6945244956772334e-05,
      "loss": 3.0095,
      "step": 408700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.694204290746078e-05,
      "loss": 3.0137,
      "step": 408800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.693884085814921e-05,
      "loss": 3.0031,
      "step": 408900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.693563880883766e-05,
      "loss": 2.9845,
      "step": 409000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.71155047416687,
      "eval_runtime": 109.2966,
      "eval_samples_per_second": 91.494,
      "eval_steps_per_second": 5.718,
      "step": 409000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.69324367595261e-05,
      "loss": 3.0076,
      "step": 409100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.692923471021454e-05,
      "loss": 3.0209,
      "step": 409200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.692603266090298e-05,
      "loss": 2.99,
      "step": 409300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.692283061159142e-05,
      "loss": 3.033,
      "step": 409400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6919628562279865e-05,
      "loss": 3.0176,
      "step": 409500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.69164265129683e-05,
      "loss": 3.0179,
      "step": 409600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6913224463656745e-05,
      "loss": 3.0082,
      "step": 409700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.691002241434518e-05,
      "loss": 3.0105,
      "step": 409800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6906820365033624e-05,
      "loss": 3.0144,
      "step": 409900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6903618315722064e-05,
      "loss": 3.0111,
      "step": 410000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.713651180267334,
      "eval_runtime": 109.3368,
      "eval_samples_per_second": 91.461,
      "eval_steps_per_second": 5.716,
      "step": 410000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6900416266410504e-05,
      "loss": 2.9879,
      "step": 410100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6897214217098944e-05,
      "loss": 2.9868,
      "step": 410200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.689401216778738e-05,
      "loss": 2.9959,
      "step": 410300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.689081011847583e-05,
      "loss": 2.9845,
      "step": 410400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.688760806916426e-05,
      "loss": 3.0186,
      "step": 410500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.688440601985271e-05,
      "loss": 2.9918,
      "step": 410600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.688120397054115e-05,
      "loss": 2.9987,
      "step": 410700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.687800192122959e-05,
      "loss": 3.0222,
      "step": 410800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.687479987191803e-05,
      "loss": 3.0178,
      "step": 410900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.687159782260647e-05,
      "loss": 3.0174,
      "step": 411000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.711188316345215,
      "eval_runtime": 109.9619,
      "eval_samples_per_second": 90.941,
      "eval_steps_per_second": 5.684,
      "step": 411000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6868395773294915e-05,
      "loss": 3.0119,
      "step": 411100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.686519372398335e-05,
      "loss": 3.0121,
      "step": 411200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6861991674671794e-05,
      "loss": 2.9889,
      "step": 411300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6858789625360234e-05,
      "loss": 2.9779,
      "step": 411400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6855587576048674e-05,
      "loss": 2.9995,
      "step": 411500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6852385526737113e-05,
      "loss": 2.9973,
      "step": 411600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.684918347742555e-05,
      "loss": 2.9961,
      "step": 411700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.684598142811399e-05,
      "loss": 2.9816,
      "step": 411800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.684277937880243e-05,
      "loss": 3.0081,
      "step": 411900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.683957732949088e-05,
      "loss": 3.006,
      "step": 412000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.712782144546509,
      "eval_runtime": 109.7201,
      "eval_samples_per_second": 91.141,
      "eval_steps_per_second": 5.696,
      "step": 412000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.683637528017931e-05,
      "loss": 3.0114,
      "step": 412100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.683317323086776e-05,
      "loss": 3.0141,
      "step": 412200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.68299711815562e-05,
      "loss": 3.0007,
      "step": 412300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.682676913224464e-05,
      "loss": 2.9662,
      "step": 412400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.682356708293308e-05,
      "loss": 3.0024,
      "step": 412500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.682036503362152e-05,
      "loss": 2.9821,
      "step": 412600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6817162984309964e-05,
      "loss": 3.0098,
      "step": 412700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.68139609349984e-05,
      "loss": 2.9864,
      "step": 412800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.6810758885686844e-05,
      "loss": 2.9768,
      "step": 412900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.680755683637528e-05,
      "loss": 3.0191,
      "step": 413000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.7137563228607178,
      "eval_runtime": 109.2454,
      "eval_samples_per_second": 91.537,
      "eval_steps_per_second": 5.721,
      "step": 413000
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.680435478706372e-05,
      "loss": 3.0227,
      "step": 413100
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.680115273775216e-05,
      "loss": 3.0158,
      "step": 413200
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.67979506884406e-05,
      "loss": 2.9797,
      "step": 413300
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.679474863912904e-05,
      "loss": 3.0072,
      "step": 413400
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.679154658981748e-05,
      "loss": 2.9702,
      "step": 413500
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.678834454050593e-05,
      "loss": 2.9878,
      "step": 413600
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.678514249119437e-05,
      "loss": 2.9917,
      "step": 413700
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.678194044188281e-05,
      "loss": 2.9872,
      "step": 413800
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.677873839257125e-05,
      "loss": 3.0001,
      "step": 413900
    },
    {
      "epoch": 0.26,
      "learning_rate": 3.677553634325969e-05,
      "loss": 3.0053,
      "step": 414000
    },
    {
      "epoch": 0.26,
      "eval_loss": 2.711573839187622,
      "eval_runtime": 108.955,
      "eval_samples_per_second": 91.781,
      "eval_steps_per_second": 5.736,
      "step": 414000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.677233429394813e-05,
      "loss": 2.9937,
      "step": 414100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.676913224463657e-05,
      "loss": 2.9972,
      "step": 414200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6765930195325013e-05,
      "loss": 3.0211,
      "step": 414300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6762728146013446e-05,
      "loss": 2.986,
      "step": 414400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.675952609670189e-05,
      "loss": 3.007,
      "step": 414500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.675632404739033e-05,
      "loss": 2.9873,
      "step": 414600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.675312199807877e-05,
      "loss": 3.0239,
      "step": 414700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.674991994876721e-05,
      "loss": 3.0184,
      "step": 414800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.674671789945565e-05,
      "loss": 3.0017,
      "step": 414900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.674351585014409e-05,
      "loss": 3.0186,
      "step": 415000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.711326837539673,
      "eval_runtime": 125.2381,
      "eval_samples_per_second": 79.848,
      "eval_steps_per_second": 4.99,
      "step": 415000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.674031380083253e-05,
      "loss": 2.9854,
      "step": 415100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.673711175152098e-05,
      "loss": 3.0198,
      "step": 415200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.673390970220942e-05,
      "loss": 2.9978,
      "step": 415300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.673070765289786e-05,
      "loss": 3.0034,
      "step": 415400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.67275056035863e-05,
      "loss": 2.9967,
      "step": 415500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.672430355427474e-05,
      "loss": 3.0268,
      "step": 415600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6721101504963177e-05,
      "loss": 3.0418,
      "step": 415700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6717899455651616e-05,
      "loss": 2.9961,
      "step": 415800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.671469740634006e-05,
      "loss": 3.0138,
      "step": 415900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.67114953570285e-05,
      "loss": 3.0152,
      "step": 416000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.71116304397583,
      "eval_runtime": 109.0141,
      "eval_samples_per_second": 91.731,
      "eval_steps_per_second": 5.733,
      "step": 416000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.670829330771694e-05,
      "loss": 2.9581,
      "step": 416100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.670509125840538e-05,
      "loss": 2.9988,
      "step": 416200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.670188920909382e-05,
      "loss": 3.0001,
      "step": 416300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.669868715978226e-05,
      "loss": 3.004,
      "step": 416400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.66954851104707e-05,
      "loss": 2.9883,
      "step": 416500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.669228306115914e-05,
      "loss": 3.0028,
      "step": 416600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.668908101184759e-05,
      "loss": 2.9877,
      "step": 416700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.668587896253603e-05,
      "loss": 2.9801,
      "step": 416800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.668267691322447e-05,
      "loss": 2.9836,
      "step": 416900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.667947486391291e-05,
      "loss": 2.9971,
      "step": 417000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7132811546325684,
      "eval_runtime": 109.0805,
      "eval_samples_per_second": 91.675,
      "eval_steps_per_second": 5.73,
      "step": 417000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6676272814601346e-05,
      "loss": 3.0076,
      "step": 417100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6673070765289786e-05,
      "loss": 3.002,
      "step": 417200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6669868715978226e-05,
      "loss": 2.9747,
      "step": 417300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6666666666666666e-05,
      "loss": 2.9869,
      "step": 417400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.666346461735511e-05,
      "loss": 3.0089,
      "step": 417500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.666026256804355e-05,
      "loss": 2.9895,
      "step": 417600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.665706051873199e-05,
      "loss": 3.003,
      "step": 417700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.665385846942043e-05,
      "loss": 3.0035,
      "step": 417800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.665065642010887e-05,
      "loss": 2.997,
      "step": 417900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.664745437079731e-05,
      "loss": 2.9801,
      "step": 418000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7135965824127197,
      "eval_runtime": 109.8066,
      "eval_samples_per_second": 91.069,
      "eval_steps_per_second": 5.692,
      "step": 418000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.664425232148575e-05,
      "loss": 2.9792,
      "step": 418100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.664105027217419e-05,
      "loss": 3.0061,
      "step": 418200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.663784822286264e-05,
      "loss": 2.9677,
      "step": 418300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.663464617355108e-05,
      "loss": 3.0007,
      "step": 418400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6631444124239516e-05,
      "loss": 2.996,
      "step": 418500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6628242074927956e-05,
      "loss": 2.9972,
      "step": 418600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6625040025616396e-05,
      "loss": 2.9911,
      "step": 418700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6621837976304836e-05,
      "loss": 2.9919,
      "step": 418800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6618635926993275e-05,
      "loss": 2.9945,
      "step": 418900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.661543387768172e-05,
      "loss": 2.9738,
      "step": 419000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.715482711791992,
      "eval_runtime": 110.3264,
      "eval_samples_per_second": 90.64,
      "eval_steps_per_second": 5.665,
      "step": 419000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6612231828370155e-05,
      "loss": 2.9898,
      "step": 419100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.66090297790586e-05,
      "loss": 3.0075,
      "step": 419200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.660582772974704e-05,
      "loss": 2.9759,
      "step": 419300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.660262568043548e-05,
      "loss": 2.9513,
      "step": 419400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.659942363112392e-05,
      "loss": 2.9862,
      "step": 419500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.659622158181236e-05,
      "loss": 2.9773,
      "step": 419600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.65930195325008e-05,
      "loss": 2.9896,
      "step": 419700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.658981748318924e-05,
      "loss": 2.9879,
      "step": 419800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6586615433877686e-05,
      "loss": 3.002,
      "step": 419900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6583413384566126e-05,
      "loss": 2.9855,
      "step": 420000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.711500406265259,
      "eval_runtime": 109.68,
      "eval_samples_per_second": 91.174,
      "eval_steps_per_second": 5.698,
      "step": 420000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6580211335254566e-05,
      "loss": 2.983,
      "step": 420100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6577009285943006e-05,
      "loss": 2.9868,
      "step": 420200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6573807236631445e-05,
      "loss": 2.9867,
      "step": 420300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6570605187319885e-05,
      "loss": 2.9914,
      "step": 420400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6567403138008325e-05,
      "loss": 2.9767,
      "step": 420500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.656420108869677e-05,
      "loss": 2.9645,
      "step": 420600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6560999039385204e-05,
      "loss": 2.9982,
      "step": 420700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.655779699007365e-05,
      "loss": 3.001,
      "step": 420800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.655459494076209e-05,
      "loss": 3.0057,
      "step": 420900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.655139289145053e-05,
      "loss": 2.9608,
      "step": 421000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7104151248931885,
      "eval_runtime": 109.2848,
      "eval_samples_per_second": 91.504,
      "eval_steps_per_second": 5.719,
      "step": 421000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.654819084213897e-05,
      "loss": 3.0184,
      "step": 421100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.654498879282741e-05,
      "loss": 2.9828,
      "step": 421200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6541786743515856e-05,
      "loss": 3.0183,
      "step": 421300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.653858469420429e-05,
      "loss": 2.9995,
      "step": 421400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6535382644892736e-05,
      "loss": 2.9869,
      "step": 421500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6532180595581175e-05,
      "loss": 3.0101,
      "step": 421600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6528978546269615e-05,
      "loss": 2.9919,
      "step": 421700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6525776496958055e-05,
      "loss": 3.0,
      "step": 421800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6522574447646495e-05,
      "loss": 2.9849,
      "step": 421900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6519372398334934e-05,
      "loss": 2.9927,
      "step": 422000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7093544006347656,
      "eval_runtime": 126.6401,
      "eval_samples_per_second": 78.964,
      "eval_steps_per_second": 4.935,
      "step": 422000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6516170349023374e-05,
      "loss": 2.9565,
      "step": 422100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.651296829971182e-05,
      "loss": 2.9997,
      "step": 422200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6509766250400254e-05,
      "loss": 3.0117,
      "step": 422300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.65065642010887e-05,
      "loss": 3.0037,
      "step": 422400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.650336215177714e-05,
      "loss": 3.0026,
      "step": 422500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.650016010246558e-05,
      "loss": 3.0194,
      "step": 422600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.649695805315402e-05,
      "loss": 2.9793,
      "step": 422700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.649375600384246e-05,
      "loss": 2.9995,
      "step": 422800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6490553954530906e-05,
      "loss": 3.0025,
      "step": 422900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.648735190521934e-05,
      "loss": 2.9769,
      "step": 423000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7118382453918457,
      "eval_runtime": 109.1329,
      "eval_samples_per_second": 91.631,
      "eval_steps_per_second": 5.727,
      "step": 423000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6484149855907785e-05,
      "loss": 2.9996,
      "step": 423100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6480947806596225e-05,
      "loss": 3.0111,
      "step": 423200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6477745757284665e-05,
      "loss": 2.9974,
      "step": 423300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6474543707973104e-05,
      "loss": 2.9955,
      "step": 423400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6471341658661544e-05,
      "loss": 2.9742,
      "step": 423500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.646813960934999e-05,
      "loss": 2.9891,
      "step": 423600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6464937560038423e-05,
      "loss": 3.0006,
      "step": 423700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.646173551072687e-05,
      "loss": 3.0078,
      "step": 423800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.64585334614153e-05,
      "loss": 2.9625,
      "step": 423900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.645533141210375e-05,
      "loss": 2.9933,
      "step": 424000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7117197513580322,
      "eval_runtime": 108.9925,
      "eval_samples_per_second": 91.749,
      "eval_steps_per_second": 5.734,
      "step": 424000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.645212936279219e-05,
      "loss": 2.9809,
      "step": 424100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.644892731348063e-05,
      "loss": 3.0155,
      "step": 424200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.644572526416907e-05,
      "loss": 3.0114,
      "step": 424300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.644252321485751e-05,
      "loss": 3.0042,
      "step": 424400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6439321165545955e-05,
      "loss": 2.9935,
      "step": 424500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.643611911623439e-05,
      "loss": 2.9783,
      "step": 424600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6432917066922834e-05,
      "loss": 2.9942,
      "step": 424700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6429715017611274e-05,
      "loss": 2.9867,
      "step": 424800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6426512968299714e-05,
      "loss": 3.0101,
      "step": 424900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6423310918988154e-05,
      "loss": 2.9994,
      "step": 425000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7101120948791504,
      "eval_runtime": 109.9477,
      "eval_samples_per_second": 90.952,
      "eval_steps_per_second": 5.685,
      "step": 425000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.642010886967659e-05,
      "loss": 2.9847,
      "step": 425100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.641690682036504e-05,
      "loss": 2.9953,
      "step": 425200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.641370477105347e-05,
      "loss": 2.9876,
      "step": 425300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.641050272174192e-05,
      "loss": 3.0047,
      "step": 425400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.640730067243035e-05,
      "loss": 3.0129,
      "step": 425500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.64040986231188e-05,
      "loss": 2.9825,
      "step": 425600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.640089657380724e-05,
      "loss": 2.998,
      "step": 425700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.639769452449568e-05,
      "loss": 2.998,
      "step": 425800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6394492475184125e-05,
      "loss": 3.0141,
      "step": 425900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.639129042587256e-05,
      "loss": 3.002,
      "step": 426000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7098171710968018,
      "eval_runtime": 109.6335,
      "eval_samples_per_second": 91.213,
      "eval_steps_per_second": 5.701,
      "step": 426000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6388088376561004e-05,
      "loss": 3.0193,
      "step": 426100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.638488632724944e-05,
      "loss": 3.0014,
      "step": 426200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6381684277937884e-05,
      "loss": 3.0124,
      "step": 426300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6378482228626324e-05,
      "loss": 2.9754,
      "step": 426400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.637528017931476e-05,
      "loss": 3.0065,
      "step": 426500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.637207813000321e-05,
      "loss": 2.9565,
      "step": 426600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.636887608069164e-05,
      "loss": 2.9781,
      "step": 426700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.636567403138009e-05,
      "loss": 2.9487,
      "step": 426800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.636247198206852e-05,
      "loss": 3.0362,
      "step": 426900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.635926993275697e-05,
      "loss": 3.0191,
      "step": 427000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.709822177886963,
      "eval_runtime": 109.5639,
      "eval_samples_per_second": 91.271,
      "eval_steps_per_second": 5.704,
      "step": 427000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.63560678834454e-05,
      "loss": 3.0151,
      "step": 427100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.635286583413385e-05,
      "loss": 2.9833,
      "step": 427200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.634966378482229e-05,
      "loss": 3.0102,
      "step": 427300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.634646173551073e-05,
      "loss": 2.9871,
      "step": 427400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6343259686199174e-05,
      "loss": 2.9941,
      "step": 427500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.634005763688761e-05,
      "loss": 2.9723,
      "step": 427600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6336855587576054e-05,
      "loss": 2.9995,
      "step": 427700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.633365353826449e-05,
      "loss": 2.9953,
      "step": 427800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.633045148895293e-05,
      "loss": 2.976,
      "step": 427900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.632724943964137e-05,
      "loss": 3.0112,
      "step": 428000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.7087621688842773,
      "eval_runtime": 109.4047,
      "eval_samples_per_second": 91.404,
      "eval_steps_per_second": 5.713,
      "step": 428000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.632404739032981e-05,
      "loss": 2.9733,
      "step": 428100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.632084534101826e-05,
      "loss": 2.9991,
      "step": 428200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.631764329170669e-05,
      "loss": 3.0067,
      "step": 428300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.631444124239514e-05,
      "loss": 2.9867,
      "step": 428400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.631123919308357e-05,
      "loss": 2.994,
      "step": 428500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.630803714377202e-05,
      "loss": 2.997,
      "step": 428600
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.630483509446045e-05,
      "loss": 3.0013,
      "step": 428700
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.63016330451489e-05,
      "loss": 2.9999,
      "step": 428800
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.629843099583734e-05,
      "loss": 2.9995,
      "step": 428900
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.629522894652578e-05,
      "loss": 2.9914,
      "step": 429000
    },
    {
      "epoch": 0.27,
      "eval_loss": 2.710874319076538,
      "eval_runtime": 126.8992,
      "eval_samples_per_second": 78.803,
      "eval_steps_per_second": 4.925,
      "step": 429000
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6292026897214224e-05,
      "loss": 2.984,
      "step": 429100
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6288824847902657e-05,
      "loss": 3.0094,
      "step": 429200
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.62856227985911e-05,
      "loss": 3.0053,
      "step": 429300
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.6282420749279536e-05,
      "loss": 2.9986,
      "step": 429400
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.627921869996798e-05,
      "loss": 3.0083,
      "step": 429500
    },
    {
      "epoch": 0.27,
      "learning_rate": 3.627601665065642e-05,
      "loss": 3.0043,
      "step": 429600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.627281460134486e-05,
      "loss": 2.9979,
      "step": 429700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.626961255203331e-05,
      "loss": 3.0095,
      "step": 429800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.626641050272174e-05,
      "loss": 2.9897,
      "step": 429900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.626320845341019e-05,
      "loss": 3.0072,
      "step": 430000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7085413932800293,
      "eval_runtime": 109.235,
      "eval_samples_per_second": 91.546,
      "eval_steps_per_second": 5.722,
      "step": 430000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.626000640409862e-05,
      "loss": 2.9879,
      "step": 430100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.625680435478707e-05,
      "loss": 2.9913,
      "step": 430200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.62536023054755e-05,
      "loss": 3.0038,
      "step": 430300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.625040025616395e-05,
      "loss": 2.9811,
      "step": 430400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.624719820685239e-05,
      "loss": 3.0112,
      "step": 430500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6243996157540826e-05,
      "loss": 2.9895,
      "step": 430600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.624079410822927e-05,
      "loss": 2.9925,
      "step": 430700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6237592058917706e-05,
      "loss": 2.9813,
      "step": 430800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.623439000960615e-05,
      "loss": 2.9978,
      "step": 430900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6231187960294585e-05,
      "loss": 3.0036,
      "step": 431000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.708692789077759,
      "eval_runtime": 109.4592,
      "eval_samples_per_second": 91.358,
      "eval_steps_per_second": 5.71,
      "step": 431000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.622798591098303e-05,
      "loss": 2.9898,
      "step": 431100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.622478386167147e-05,
      "loss": 3.0072,
      "step": 431200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.622158181235991e-05,
      "loss": 2.999,
      "step": 431300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.621837976304836e-05,
      "loss": 3.0168,
      "step": 431400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.621517771373679e-05,
      "loss": 3.0176,
      "step": 431500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.621197566442524e-05,
      "loss": 2.9998,
      "step": 431600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.620877361511367e-05,
      "loss": 2.9915,
      "step": 431700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.620557156580212e-05,
      "loss": 2.9791,
      "step": 431800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.620236951649055e-05,
      "loss": 3.0017,
      "step": 431900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6199167467178996e-05,
      "loss": 2.9942,
      "step": 432000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7089591026306152,
      "eval_runtime": 109.0705,
      "eval_samples_per_second": 91.684,
      "eval_steps_per_second": 5.73,
      "step": 432000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6195965417867436e-05,
      "loss": 2.9755,
      "step": 432100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6192763368555876e-05,
      "loss": 3.0221,
      "step": 432200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.618956131924432e-05,
      "loss": 2.9982,
      "step": 432300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6186359269932755e-05,
      "loss": 2.9962,
      "step": 432400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.61831572206212e-05,
      "loss": 2.971,
      "step": 432500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6179955171309635e-05,
      "loss": 3.0288,
      "step": 432600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.617675312199808e-05,
      "loss": 2.9883,
      "step": 432700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.617355107268652e-05,
      "loss": 2.9982,
      "step": 432800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.617034902337496e-05,
      "loss": 2.9643,
      "step": 432900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.616714697406341e-05,
      "loss": 2.9887,
      "step": 433000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7074780464172363,
      "eval_runtime": 109.7986,
      "eval_samples_per_second": 91.076,
      "eval_steps_per_second": 5.692,
      "step": 433000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.616394492475184e-05,
      "loss": 2.9761,
      "step": 433100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.616074287544029e-05,
      "loss": 3.0131,
      "step": 433200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.615754082612872e-05,
      "loss": 3.0134,
      "step": 433300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6154338776817166e-05,
      "loss": 3.0066,
      "step": 433400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6151136727505606e-05,
      "loss": 2.9871,
      "step": 433500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6147934678194046e-05,
      "loss": 2.9797,
      "step": 433600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6144732628882485e-05,
      "loss": 2.9948,
      "step": 433700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6141530579570925e-05,
      "loss": 2.9859,
      "step": 433800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.613832853025937e-05,
      "loss": 2.9944,
      "step": 433900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6135126480947805e-05,
      "loss": 2.989,
      "step": 434000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7087466716766357,
      "eval_runtime": 128.0123,
      "eval_samples_per_second": 78.117,
      "eval_steps_per_second": 4.882,
      "step": 434000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.613192443163625e-05,
      "loss": 2.9914,
      "step": 434100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6128722382324684e-05,
      "loss": 3.0043,
      "step": 434200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.612552033301313e-05,
      "loss": 2.9955,
      "step": 434300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.612231828370157e-05,
      "loss": 3.0031,
      "step": 434400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.611911623439001e-05,
      "loss": 3.0037,
      "step": 434500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.611591418507846e-05,
      "loss": 2.9975,
      "step": 434600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.611271213576689e-05,
      "loss": 2.9805,
      "step": 434700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6109510086455336e-05,
      "loss": 3.0101,
      "step": 434800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.610630803714377e-05,
      "loss": 3.0006,
      "step": 434900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6103105987832216e-05,
      "loss": 2.9859,
      "step": 435000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7095413208007812,
      "eval_runtime": 109.749,
      "eval_samples_per_second": 91.117,
      "eval_steps_per_second": 5.695,
      "step": 435000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6099903938520655e-05,
      "loss": 3.0211,
      "step": 435100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6096701889209095e-05,
      "loss": 2.9703,
      "step": 435200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6093499839897535e-05,
      "loss": 2.987,
      "step": 435300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6090297790585975e-05,
      "loss": 3.0101,
      "step": 435400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.608709574127442e-05,
      "loss": 2.9925,
      "step": 435500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6083893691962854e-05,
      "loss": 3.0108,
      "step": 435600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.60806916426513e-05,
      "loss": 3.0039,
      "step": 435700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.607748959333974e-05,
      "loss": 3.0176,
      "step": 435800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.607428754402818e-05,
      "loss": 2.963,
      "step": 435900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.607108549471662e-05,
      "loss": 2.963,
      "step": 436000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7097606658935547,
      "eval_runtime": 109.7654,
      "eval_samples_per_second": 91.103,
      "eval_steps_per_second": 5.694,
      "step": 436000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.606788344540506e-05,
      "loss": 2.9797,
      "step": 436100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.60646813960935e-05,
      "loss": 3.0085,
      "step": 436200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.606147934678194e-05,
      "loss": 2.9913,
      "step": 436300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6058277297470385e-05,
      "loss": 3.0159,
      "step": 436400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6055075248158825e-05,
      "loss": 2.9923,
      "step": 436500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6051873198847265e-05,
      "loss": 2.9862,
      "step": 436600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6048671149535705e-05,
      "loss": 3.0042,
      "step": 436700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6045469100224144e-05,
      "loss": 3.0072,
      "step": 436800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6042267050912584e-05,
      "loss": 2.994,
      "step": 436900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6039065001601024e-05,
      "loss": 2.9883,
      "step": 437000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.710663318634033,
      "eval_runtime": 109.4795,
      "eval_samples_per_second": 91.341,
      "eval_steps_per_second": 5.709,
      "step": 437000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.603586295228947e-05,
      "loss": 2.9964,
      "step": 437100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.60326609029779e-05,
      "loss": 2.9681,
      "step": 437200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.602945885366635e-05,
      "loss": 2.9906,
      "step": 437300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.602625680435479e-05,
      "loss": 2.9932,
      "step": 437400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.602305475504323e-05,
      "loss": 2.9864,
      "step": 437500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.601985270573167e-05,
      "loss": 2.9831,
      "step": 437600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.601665065642011e-05,
      "loss": 2.9876,
      "step": 437700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.601344860710855e-05,
      "loss": 2.9975,
      "step": 437800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.601024655779699e-05,
      "loss": 3.0057,
      "step": 437900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6007044508485435e-05,
      "loss": 2.998,
      "step": 438000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7090940475463867,
      "eval_runtime": 109.3008,
      "eval_samples_per_second": 91.491,
      "eval_steps_per_second": 5.718,
      "step": 438000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6003842459173875e-05,
      "loss": 2.9782,
      "step": 438100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.6000640409862314e-05,
      "loss": 3.0061,
      "step": 438200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5997438360550754e-05,
      "loss": 3.0125,
      "step": 438300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5994236311239194e-05,
      "loss": 3.0278,
      "step": 438400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5991034261927634e-05,
      "loss": 3.017,
      "step": 438500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.598783221261607e-05,
      "loss": 2.9632,
      "step": 438600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.598463016330452e-05,
      "loss": 3.0117,
      "step": 438700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.598142811399296e-05,
      "loss": 2.9733,
      "step": 438800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.59782260646814e-05,
      "loss": 2.9851,
      "step": 438900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.597502401536984e-05,
      "loss": 2.9818,
      "step": 439000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7073731422424316,
      "eval_runtime": 126.1365,
      "eval_samples_per_second": 79.279,
      "eval_steps_per_second": 4.955,
      "step": 439000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.597182196605828e-05,
      "loss": 3.0173,
      "step": 439100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.596861991674672e-05,
      "loss": 2.9644,
      "step": 439200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.596541786743516e-05,
      "loss": 2.9875,
      "step": 439300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.59622158181236e-05,
      "loss": 3.0068,
      "step": 439400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.595901376881204e-05,
      "loss": 2.992,
      "step": 439500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5955811719500484e-05,
      "loss": 2.9926,
      "step": 439600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5952609670188924e-05,
      "loss": 3.0062,
      "step": 439700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5949407620877364e-05,
      "loss": 3.0093,
      "step": 439800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5946205571565803e-05,
      "loss": 3.0086,
      "step": 439900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.594300352225424e-05,
      "loss": 2.9804,
      "step": 440000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7071495056152344,
      "eval_runtime": 109.3003,
      "eval_samples_per_second": 91.491,
      "eval_steps_per_second": 5.718,
      "step": 440000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.593980147294268e-05,
      "loss": 2.9824,
      "step": 440100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.593659942363112e-05,
      "loss": 3.0146,
      "step": 440200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.593339737431957e-05,
      "loss": 2.9863,
      "step": 440300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.593019532500801e-05,
      "loss": 2.9989,
      "step": 440400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.592699327569645e-05,
      "loss": 3.0166,
      "step": 440500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.592379122638489e-05,
      "loss": 2.991,
      "step": 440600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.592058917707333e-05,
      "loss": 2.9772,
      "step": 440700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.591738712776177e-05,
      "loss": 3.0039,
      "step": 440800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.591418507845021e-05,
      "loss": 2.9844,
      "step": 440900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.591098302913865e-05,
      "loss": 3.0155,
      "step": 441000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.707533597946167,
      "eval_runtime": 109.1926,
      "eval_samples_per_second": 91.581,
      "eval_steps_per_second": 5.724,
      "step": 441000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5907780979827094e-05,
      "loss": 2.9789,
      "step": 441100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5904578930515534e-05,
      "loss": 3.008,
      "step": 441200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.590137688120397e-05,
      "loss": 3.0078,
      "step": 441300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.589817483189241e-05,
      "loss": 2.9795,
      "step": 441400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.589497278258085e-05,
      "loss": 2.9831,
      "step": 441500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.589177073326929e-05,
      "loss": 2.9662,
      "step": 441600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.588856868395773e-05,
      "loss": 3.0023,
      "step": 441700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.588536663464617e-05,
      "loss": 2.9837,
      "step": 441800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.588216458533462e-05,
      "loss": 2.9951,
      "step": 441900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.587896253602306e-05,
      "loss": 2.9892,
      "step": 442000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7072486877441406,
      "eval_runtime": 109.4086,
      "eval_samples_per_second": 91.4,
      "eval_steps_per_second": 5.713,
      "step": 442000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.58757604867115e-05,
      "loss": 2.9876,
      "step": 442100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.587255843739994e-05,
      "loss": 2.9838,
      "step": 442200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.586935638808838e-05,
      "loss": 3.007,
      "step": 442300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.586615433877682e-05,
      "loss": 2.9828,
      "step": 442400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.586295228946526e-05,
      "loss": 2.9765,
      "step": 442500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.58597502401537e-05,
      "loss": 3.0032,
      "step": 442600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.585654819084214e-05,
      "loss": 3.0165,
      "step": 442700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.585334614153058e-05,
      "loss": 2.9845,
      "step": 442800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.585014409221902e-05,
      "loss": 2.9895,
      "step": 442900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.584694204290746e-05,
      "loss": 2.9998,
      "step": 443000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.7065634727478027,
      "eval_runtime": 110.104,
      "eval_samples_per_second": 90.823,
      "eval_steps_per_second": 5.676,
      "step": 443000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.58437399935959e-05,
      "loss": 2.9826,
      "step": 443100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.584053794428434e-05,
      "loss": 2.982,
      "step": 443200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.583733589497278e-05,
      "loss": 2.9803,
      "step": 443300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.583413384566123e-05,
      "loss": 3.0204,
      "step": 443400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.583093179634967e-05,
      "loss": 2.9855,
      "step": 443500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.582772974703811e-05,
      "loss": 3.0401,
      "step": 443600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.582452769772655e-05,
      "loss": 2.9814,
      "step": 443700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.582132564841499e-05,
      "loss": 2.9966,
      "step": 443800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.581812359910343e-05,
      "loss": 2.9774,
      "step": 443900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5814921549791867e-05,
      "loss": 3.0186,
      "step": 444000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.707742214202881,
      "eval_runtime": 126.4377,
      "eval_samples_per_second": 79.09,
      "eval_steps_per_second": 4.943,
      "step": 444000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.581171950048031e-05,
      "loss": 2.9969,
      "step": 444100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.5808517451168746e-05,
      "loss": 2.9654,
      "step": 444200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.580531540185719e-05,
      "loss": 2.9921,
      "step": 444300
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.580211335254563e-05,
      "loss": 2.9776,
      "step": 444400
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.579891130323407e-05,
      "loss": 2.9893,
      "step": 444500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.579570925392251e-05,
      "loss": 3.0182,
      "step": 444600
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.579250720461095e-05,
      "loss": 3.0046,
      "step": 444700
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.578930515529939e-05,
      "loss": 3.0269,
      "step": 444800
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.578610310598783e-05,
      "loss": 2.9993,
      "step": 444900
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.578290105667628e-05,
      "loss": 3.0073,
      "step": 445000
    },
    {
      "epoch": 0.28,
      "eval_loss": 2.709017753601074,
      "eval_runtime": 109.7223,
      "eval_samples_per_second": 91.139,
      "eval_steps_per_second": 5.696,
      "step": 445000
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.577969900736472e-05,
      "loss": 3.0093,
      "step": 445100
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.577649695805316e-05,
      "loss": 2.9856,
      "step": 445200
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.57732949087416e-05,
      "loss": 2.9791,
      "step": 445300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5770092859430036e-05,
      "loss": 2.9891,
      "step": 445400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5766890810118476e-05,
      "loss": 2.9753,
      "step": 445500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5763688760806916e-05,
      "loss": 3.0002,
      "step": 445600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.576048671149536e-05,
      "loss": 2.984,
      "step": 445700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5757284662183795e-05,
      "loss": 3.0043,
      "step": 445800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.575408261287224e-05,
      "loss": 2.9672,
      "step": 445900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.575088056356068e-05,
      "loss": 3.0191,
      "step": 446000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.709226131439209,
      "eval_runtime": 109.351,
      "eval_samples_per_second": 91.449,
      "eval_steps_per_second": 5.716,
      "step": 446000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.574767851424912e-05,
      "loss": 3.0005,
      "step": 446100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.574447646493756e-05,
      "loss": 2.9809,
      "step": 446200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5741274415626e-05,
      "loss": 3.0006,
      "step": 446300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.573807236631445e-05,
      "loss": 3.0027,
      "step": 446400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.573487031700288e-05,
      "loss": 2.9731,
      "step": 446500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.573166826769133e-05,
      "loss": 3.0044,
      "step": 446600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.572846621837977e-05,
      "loss": 2.9895,
      "step": 446700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5725264169068206e-05,
      "loss": 2.9886,
      "step": 446800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5722062119756646e-05,
      "loss": 2.9877,
      "step": 446900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5718860070445086e-05,
      "loss": 2.9735,
      "step": 447000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.70713210105896,
      "eval_runtime": 109.4083,
      "eval_samples_per_second": 91.401,
      "eval_steps_per_second": 5.713,
      "step": 447000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5715658021133526e-05,
      "loss": 2.9795,
      "step": 447100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5712455971821965e-05,
      "loss": 2.968,
      "step": 447200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.570925392251041e-05,
      "loss": 2.9858,
      "step": 447300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5706051873198845e-05,
      "loss": 3.0024,
      "step": 447400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.570284982388729e-05,
      "loss": 2.9768,
      "step": 447500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.569964777457573e-05,
      "loss": 3.0099,
      "step": 447600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.569644572526417e-05,
      "loss": 2.9772,
      "step": 447700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.569324367595261e-05,
      "loss": 2.9948,
      "step": 447800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.569004162664105e-05,
      "loss": 3.0235,
      "step": 447900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.56868395773295e-05,
      "loss": 3.0059,
      "step": 448000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7047386169433594,
      "eval_runtime": 109.2368,
      "eval_samples_per_second": 91.544,
      "eval_steps_per_second": 5.722,
      "step": 448000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.568363752801793e-05,
      "loss": 2.9872,
      "step": 448100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5680435478706376e-05,
      "loss": 3.0157,
      "step": 448200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5677233429394816e-05,
      "loss": 2.9726,
      "step": 448300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5674031380083256e-05,
      "loss": 3.0029,
      "step": 448400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5670829330771695e-05,
      "loss": 3.0096,
      "step": 448500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5667627281460135e-05,
      "loss": 2.9783,
      "step": 448600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.566442523214858e-05,
      "loss": 2.9943,
      "step": 448700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5661223182837015e-05,
      "loss": 3.0005,
      "step": 448800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.565802113352546e-05,
      "loss": 2.975,
      "step": 448900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5654819084213894e-05,
      "loss": 2.9729,
      "step": 449000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7090981006622314,
      "eval_runtime": 127.6184,
      "eval_samples_per_second": 78.359,
      "eval_steps_per_second": 4.897,
      "step": 449000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.565161703490234e-05,
      "loss": 2.9673,
      "step": 449100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.564841498559078e-05,
      "loss": 2.994,
      "step": 449200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.564521293627922e-05,
      "loss": 2.9649,
      "step": 449300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.564201088696766e-05,
      "loss": 3.0077,
      "step": 449400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.56388088376561e-05,
      "loss": 2.989,
      "step": 449500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5635606788344546e-05,
      "loss": 2.9808,
      "step": 449600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.563240473903298e-05,
      "loss": 3.0114,
      "step": 449700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5629202689721426e-05,
      "loss": 2.9807,
      "step": 449800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5626000640409865e-05,
      "loss": 2.9926,
      "step": 449900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5622798591098305e-05,
      "loss": 2.9994,
      "step": 450000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7079014778137207,
      "eval_runtime": 109.3652,
      "eval_samples_per_second": 91.437,
      "eval_steps_per_second": 5.715,
      "step": 450000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5619596541786745e-05,
      "loss": 3.0009,
      "step": 450100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5616394492475185e-05,
      "loss": 2.9951,
      "step": 450200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.561319244316363e-05,
      "loss": 2.9912,
      "step": 450300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5609990393852064e-05,
      "loss": 2.9877,
      "step": 450400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.560678834454051e-05,
      "loss": 2.9488,
      "step": 450500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5603586295228944e-05,
      "loss": 2.9903,
      "step": 450600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.560038424591739e-05,
      "loss": 2.9917,
      "step": 450700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.559718219660583e-05,
      "loss": 2.9703,
      "step": 450800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.559398014729427e-05,
      "loss": 3.016,
      "step": 450900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5590778097982716e-05,
      "loss": 3.0041,
      "step": 451000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7081172466278076,
      "eval_runtime": 109.4466,
      "eval_samples_per_second": 91.369,
      "eval_steps_per_second": 5.711,
      "step": 451000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.558757604867115e-05,
      "loss": 2.9918,
      "step": 451100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5584373999359596e-05,
      "loss": 2.9653,
      "step": 451200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.558117195004803e-05,
      "loss": 2.972,
      "step": 451300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5577969900736475e-05,
      "loss": 2.9797,
      "step": 451400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5574767851424915e-05,
      "loss": 2.9843,
      "step": 451500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5571565802113355e-05,
      "loss": 2.9946,
      "step": 451600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5568363752801794e-05,
      "loss": 2.9941,
      "step": 451700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5565161703490234e-05,
      "loss": 3.0038,
      "step": 451800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.556195965417868e-05,
      "loss": 2.9814,
      "step": 451900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5558757604867113e-05,
      "loss": 2.9792,
      "step": 452000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7073111534118652,
      "eval_runtime": 109.8021,
      "eval_samples_per_second": 91.073,
      "eval_steps_per_second": 5.692,
      "step": 452000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.555555555555556e-05,
      "loss": 2.9663,
      "step": 452100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.555235350624399e-05,
      "loss": 2.9762,
      "step": 452200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.554915145693244e-05,
      "loss": 3.0115,
      "step": 452300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.554594940762088e-05,
      "loss": 2.9851,
      "step": 452400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.554274735830932e-05,
      "loss": 3.0025,
      "step": 452500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5539545308997765e-05,
      "loss": 2.9931,
      "step": 452600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.55363432596862e-05,
      "loss": 2.9562,
      "step": 452700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5533141210374645e-05,
      "loss": 2.9843,
      "step": 452800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.552993916106308e-05,
      "loss": 2.972,
      "step": 452900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5526737111751524e-05,
      "loss": 2.999,
      "step": 453000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7079315185546875,
      "eval_runtime": 109.9406,
      "eval_samples_per_second": 90.958,
      "eval_steps_per_second": 5.685,
      "step": 453000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5523535062439964e-05,
      "loss": 2.9719,
      "step": 453100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5520333013128404e-05,
      "loss": 2.9902,
      "step": 453200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.551713096381685e-05,
      "loss": 2.9798,
      "step": 453300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.551392891450528e-05,
      "loss": 2.9859,
      "step": 453400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.551072686519373e-05,
      "loss": 2.9929,
      "step": 453500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.550752481588216e-05,
      "loss": 2.984,
      "step": 453600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.550432276657061e-05,
      "loss": 2.976,
      "step": 453700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.550112071725904e-05,
      "loss": 3.0195,
      "step": 453800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.549791866794749e-05,
      "loss": 3.0007,
      "step": 453900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.549471661863593e-05,
      "loss": 2.9948,
      "step": 454000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7065906524658203,
      "eval_runtime": 109.0294,
      "eval_samples_per_second": 91.718,
      "eval_steps_per_second": 5.732,
      "step": 454000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.549151456932437e-05,
      "loss": 2.9673,
      "step": 454100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5488312520012815e-05,
      "loss": 2.9803,
      "step": 454200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.548511047070125e-05,
      "loss": 2.9764,
      "step": 454300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5481908421389694e-05,
      "loss": 3.0028,
      "step": 454400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.547870637207813e-05,
      "loss": 2.9851,
      "step": 454500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5475504322766574e-05,
      "loss": 2.9938,
      "step": 454600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5472302273455014e-05,
      "loss": 3.0284,
      "step": 454700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.546910022414345e-05,
      "loss": 3.0047,
      "step": 454800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.546589817483189e-05,
      "loss": 2.9978,
      "step": 454900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.546269612552033e-05,
      "loss": 2.9636,
      "step": 455000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7064974308013916,
      "eval_runtime": 109.0564,
      "eval_samples_per_second": 91.696,
      "eval_steps_per_second": 5.731,
      "step": 455000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.545949407620878e-05,
      "loss": 2.979,
      "step": 455100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.545629202689721e-05,
      "loss": 3.0067,
      "step": 455200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.545308997758566e-05,
      "loss": 3.0118,
      "step": 455300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.544988792827409e-05,
      "loss": 2.9584,
      "step": 455400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.544668587896254e-05,
      "loss": 2.9923,
      "step": 455500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.544348382965098e-05,
      "loss": 2.9769,
      "step": 455600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.544028178033942e-05,
      "loss": 3.0084,
      "step": 455700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5437079731027864e-05,
      "loss": 3.0096,
      "step": 455800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.54338776817163e-05,
      "loss": 2.9537,
      "step": 455900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5430675632404744e-05,
      "loss": 2.9904,
      "step": 456000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.703334093093872,
      "eval_runtime": 109.4088,
      "eval_samples_per_second": 91.4,
      "eval_steps_per_second": 5.713,
      "step": 456000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.542747358309318e-05,
      "loss": 2.9908,
      "step": 456100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.542427153378162e-05,
      "loss": 2.9797,
      "step": 456200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.542106948447006e-05,
      "loss": 2.9665,
      "step": 456300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.54178674351585e-05,
      "loss": 2.9797,
      "step": 456400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.541466538584694e-05,
      "loss": 2.9797,
      "step": 456500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.541146333653538e-05,
      "loss": 2.9736,
      "step": 456600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.540826128722383e-05,
      "loss": 2.9479,
      "step": 456700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.540505923791226e-05,
      "loss": 2.9743,
      "step": 456800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.540185718860071e-05,
      "loss": 2.9657,
      "step": 456900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.539865513928914e-05,
      "loss": 2.9911,
      "step": 457000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.704658269882202,
      "eval_runtime": 109.3672,
      "eval_samples_per_second": 91.435,
      "eval_steps_per_second": 5.715,
      "step": 457000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.539545308997759e-05,
      "loss": 2.9902,
      "step": 457100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.539225104066603e-05,
      "loss": 2.9832,
      "step": 457200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.538904899135447e-05,
      "loss": 3.0089,
      "step": 457300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5385846942042914e-05,
      "loss": 2.9858,
      "step": 457400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5382644892731347e-05,
      "loss": 2.9949,
      "step": 457500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.537944284341979e-05,
      "loss": 2.9666,
      "step": 457600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5376240794108226e-05,
      "loss": 3.013,
      "step": 457700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.537303874479667e-05,
      "loss": 2.9896,
      "step": 457800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.536983669548511e-05,
      "loss": 2.9958,
      "step": 457900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.536663464617355e-05,
      "loss": 2.9931,
      "step": 458000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7062416076660156,
      "eval_runtime": 109.1255,
      "eval_samples_per_second": 91.638,
      "eval_steps_per_second": 5.727,
      "step": 458000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.536343259686199e-05,
      "loss": 2.995,
      "step": 458100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.536023054755043e-05,
      "loss": 2.9895,
      "step": 458200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.535702849823888e-05,
      "loss": 2.9868,
      "step": 458300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.535382644892731e-05,
      "loss": 2.9978,
      "step": 458400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.535062439961576e-05,
      "loss": 2.9919,
      "step": 458500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.53474223503042e-05,
      "loss": 2.9947,
      "step": 458600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.534422030099264e-05,
      "loss": 2.9858,
      "step": 458700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.534101825168108e-05,
      "loss": 3.0024,
      "step": 458800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5337816202369516e-05,
      "loss": 2.9932,
      "step": 458900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.533461415305796e-05,
      "loss": 2.9807,
      "step": 459000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.703824043273926,
      "eval_runtime": 128.711,
      "eval_samples_per_second": 77.693,
      "eval_steps_per_second": 4.856,
      "step": 459000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5331412103746396e-05,
      "loss": 2.9446,
      "step": 459100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.532821005443484e-05,
      "loss": 3.0021,
      "step": 459200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5325008005123275e-05,
      "loss": 3.0011,
      "step": 459300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.532180595581172e-05,
      "loss": 2.9946,
      "step": 459400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.531860390650016e-05,
      "loss": 2.9965,
      "step": 459500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.53154018571886e-05,
      "loss": 3.0055,
      "step": 459600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.531219980787704e-05,
      "loss": 2.9714,
      "step": 459700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.530899775856548e-05,
      "loss": 2.9758,
      "step": 459800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.530579570925393e-05,
      "loss": 2.9986,
      "step": 459900
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.530259365994236e-05,
      "loss": 3.0032,
      "step": 460000
    },
    {
      "epoch": 0.29,
      "eval_loss": 2.7023680210113525,
      "eval_runtime": 109.8663,
      "eval_samples_per_second": 91.02,
      "eval_steps_per_second": 5.689,
      "step": 460000
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.529939161063081e-05,
      "loss": 3.0166,
      "step": 460100
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5296189561319247e-05,
      "loss": 2.9951,
      "step": 460200
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5292987512007686e-05,
      "loss": 2.9705,
      "step": 460300
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5289785462696126e-05,
      "loss": 2.9679,
      "step": 460400
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5286583413384566e-05,
      "loss": 2.9711,
      "step": 460500
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.528338136407301e-05,
      "loss": 2.9907,
      "step": 460600
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.5280179314761445e-05,
      "loss": 3.005,
      "step": 460700
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.527697726544989e-05,
      "loss": 2.9837,
      "step": 460800
    },
    {
      "epoch": 0.29,
      "learning_rate": 3.527377521613833e-05,
      "loss": 2.9616,
      "step": 460900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.527057316682677e-05,
      "loss": 2.9756,
      "step": 461000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7047388553619385,
      "eval_runtime": 109.735,
      "eval_samples_per_second": 91.129,
      "eval_steps_per_second": 5.696,
      "step": 461000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.526737111751521e-05,
      "loss": 3.0102,
      "step": 461100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.526416906820365e-05,
      "loss": 2.9793,
      "step": 461200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.526096701889209e-05,
      "loss": 2.9948,
      "step": 461300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.525776496958053e-05,
      "loss": 2.9593,
      "step": 461400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.525456292026898e-05,
      "loss": 2.998,
      "step": 461500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.525136087095741e-05,
      "loss": 2.9776,
      "step": 461600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5248158821645856e-05,
      "loss": 2.9793,
      "step": 461700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5244956772334296e-05,
      "loss": 2.9758,
      "step": 461800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5241754723022736e-05,
      "loss": 2.9885,
      "step": 461900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5238552673711175e-05,
      "loss": 2.9844,
      "step": 462000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7062535285949707,
      "eval_runtime": 109.3053,
      "eval_samples_per_second": 91.487,
      "eval_steps_per_second": 5.718,
      "step": 462000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5235350624399615e-05,
      "loss": 2.9748,
      "step": 462100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.523214857508806e-05,
      "loss": 2.9827,
      "step": 462200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5228946525776495e-05,
      "loss": 2.9628,
      "step": 462300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.522574447646494e-05,
      "loss": 2.9871,
      "step": 462400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.522254242715338e-05,
      "loss": 2.968,
      "step": 462500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.521934037784182e-05,
      "loss": 2.9859,
      "step": 462600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.521613832853026e-05,
      "loss": 2.9963,
      "step": 462700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.52129362792187e-05,
      "loss": 3.0,
      "step": 462800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.520973422990714e-05,
      "loss": 2.9919,
      "step": 462900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.520653218059558e-05,
      "loss": 3.0001,
      "step": 463000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7057528495788574,
      "eval_runtime": 109.5294,
      "eval_samples_per_second": 91.3,
      "eval_steps_per_second": 5.706,
      "step": 463000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5203330131284026e-05,
      "loss": 2.9756,
      "step": 463100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5200128081972466e-05,
      "loss": 2.9597,
      "step": 463200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5196926032660906e-05,
      "loss": 2.9803,
      "step": 463300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5193723983349345e-05,
      "loss": 2.9807,
      "step": 463400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5190521934037785e-05,
      "loss": 2.9671,
      "step": 463500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5187319884726225e-05,
      "loss": 2.9934,
      "step": 463600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5184117835414665e-05,
      "loss": 2.9844,
      "step": 463700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.518091578610311e-05,
      "loss": 2.9846,
      "step": 463800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.517771373679155e-05,
      "loss": 2.9602,
      "step": 463900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.517451168747999e-05,
      "loss": 2.9845,
      "step": 464000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7038910388946533,
      "eval_runtime": 127.989,
      "eval_samples_per_second": 78.132,
      "eval_steps_per_second": 4.883,
      "step": 464000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.517130963816843e-05,
      "loss": 2.9856,
      "step": 464100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.516810758885687e-05,
      "loss": 2.9793,
      "step": 464200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.516490553954531e-05,
      "loss": 3.0016,
      "step": 464300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.516170349023375e-05,
      "loss": 3.0063,
      "step": 464400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.515850144092219e-05,
      "loss": 2.9663,
      "step": 464500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.515529939161063e-05,
      "loss": 3.0079,
      "step": 464600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5152097342299075e-05,
      "loss": 2.9972,
      "step": 464700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5148895292987515e-05,
      "loss": 3.0088,
      "step": 464800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5145693243675955e-05,
      "loss": 3.0073,
      "step": 464900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5142491194364395e-05,
      "loss": 3.0063,
      "step": 465000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.70666241645813,
      "eval_runtime": 109.1846,
      "eval_samples_per_second": 91.588,
      "eval_steps_per_second": 5.724,
      "step": 465000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5139289145052834e-05,
      "loss": 2.9785,
      "step": 465100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5136087095741274e-05,
      "loss": 3.0162,
      "step": 465200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5132885046429714e-05,
      "loss": 3.0053,
      "step": 465300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.512968299711816e-05,
      "loss": 2.9875,
      "step": 465400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.51264809478066e-05,
      "loss": 2.9815,
      "step": 465500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.512327889849504e-05,
      "loss": 3.0031,
      "step": 465600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.512007684918348e-05,
      "loss": 2.9951,
      "step": 465700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.511687479987192e-05,
      "loss": 2.9958,
      "step": 465800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.511367275056036e-05,
      "loss": 2.9969,
      "step": 465900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.51104707012488e-05,
      "loss": 2.9852,
      "step": 466000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.707252264022827,
      "eval_runtime": 109.2366,
      "eval_samples_per_second": 91.544,
      "eval_steps_per_second": 5.722,
      "step": 466000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.510726865193724e-05,
      "loss": 2.987,
      "step": 466100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5104066602625685e-05,
      "loss": 2.9875,
      "step": 466200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5100864553314125e-05,
      "loss": 2.9964,
      "step": 466300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5097662504002565e-05,
      "loss": 2.9818,
      "step": 466400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5094460454691004e-05,
      "loss": 2.9986,
      "step": 466500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5091258405379444e-05,
      "loss": 2.9968,
      "step": 466600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5088056356067884e-05,
      "loss": 2.994,
      "step": 466700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5084854306756324e-05,
      "loss": 2.9793,
      "step": 466800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.508165225744476e-05,
      "loss": 2.9612,
      "step": 466900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.507845020813321e-05,
      "loss": 2.985,
      "step": 467000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7058708667755127,
      "eval_runtime": 109.4116,
      "eval_samples_per_second": 91.398,
      "eval_steps_per_second": 5.712,
      "step": 467000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.507524815882165e-05,
      "loss": 2.9939,
      "step": 467100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.507204610951009e-05,
      "loss": 2.9844,
      "step": 467200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.506884406019853e-05,
      "loss": 2.9699,
      "step": 467300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.506564201088697e-05,
      "loss": 2.9649,
      "step": 467400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.506243996157541e-05,
      "loss": 3.0023,
      "step": 467500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.505923791226385e-05,
      "loss": 2.9811,
      "step": 467600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.505603586295229e-05,
      "loss": 2.9763,
      "step": 467700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5052833813640734e-05,
      "loss": 3.0122,
      "step": 467800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5049631764329174e-05,
      "loss": 2.9967,
      "step": 467900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5046429715017614e-05,
      "loss": 2.9915,
      "step": 468000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7068190574645996,
      "eval_runtime": 110.069,
      "eval_samples_per_second": 90.852,
      "eval_steps_per_second": 5.678,
      "step": 468000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5043227665706054e-05,
      "loss": 2.9865,
      "step": 468100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5040025616394493e-05,
      "loss": 2.9548,
      "step": 468200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.503682356708293e-05,
      "loss": 2.9645,
      "step": 468300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.503362151777137e-05,
      "loss": 2.9727,
      "step": 468400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.503041946845982e-05,
      "loss": 2.9907,
      "step": 468500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.502721741914826e-05,
      "loss": 2.9941,
      "step": 468600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.50240153698367e-05,
      "loss": 2.9877,
      "step": 468700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.502081332052514e-05,
      "loss": 2.9926,
      "step": 468800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.501761127121358e-05,
      "loss": 2.9939,
      "step": 468900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.501440922190202e-05,
      "loss": 2.9658,
      "step": 469000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.705523729324341,
      "eval_runtime": 128.4164,
      "eval_samples_per_second": 77.872,
      "eval_steps_per_second": 4.867,
      "step": 469000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.501120717259046e-05,
      "loss": 3.0024,
      "step": 469100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.50080051232789e-05,
      "loss": 2.9674,
      "step": 469200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.500480307396734e-05,
      "loss": 2.9815,
      "step": 469300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.5001601024655784e-05,
      "loss": 2.9963,
      "step": 469400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4998398975344224e-05,
      "loss": 2.9967,
      "step": 469500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.499519692603266e-05,
      "loss": 2.9973,
      "step": 469600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.49919948767211e-05,
      "loss": 2.956,
      "step": 469700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.498879282740954e-05,
      "loss": 2.9615,
      "step": 469800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.498559077809798e-05,
      "loss": 2.9944,
      "step": 469900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.498238872878642e-05,
      "loss": 3.0006,
      "step": 470000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7042880058288574,
      "eval_runtime": 109.5042,
      "eval_samples_per_second": 91.321,
      "eval_steps_per_second": 5.708,
      "step": 470000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.497918667947487e-05,
      "loss": 2.9885,
      "step": 470100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.497598463016331e-05,
      "loss": 2.9915,
      "step": 470200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.497278258085175e-05,
      "loss": 2.9736,
      "step": 470300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.496958053154019e-05,
      "loss": 2.9828,
      "step": 470400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.496637848222863e-05,
      "loss": 3.0228,
      "step": 470500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.496317643291707e-05,
      "loss": 3.0207,
      "step": 470600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.495997438360551e-05,
      "loss": 3.0164,
      "step": 470700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4956772334293954e-05,
      "loss": 2.9874,
      "step": 470800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.495357028498239e-05,
      "loss": 2.9915,
      "step": 470900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.495036823567083e-05,
      "loss": 2.9891,
      "step": 471000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7042458057403564,
      "eval_runtime": 109.0173,
      "eval_samples_per_second": 91.729,
      "eval_steps_per_second": 5.733,
      "step": 471000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.494716618635927e-05,
      "loss": 2.9883,
      "step": 471100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.494396413704771e-05,
      "loss": 2.9969,
      "step": 471200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.494076208773615e-05,
      "loss": 2.9959,
      "step": 471300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.493756003842459e-05,
      "loss": 3.0076,
      "step": 471400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.493435798911303e-05,
      "loss": 2.9785,
      "step": 471500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.493115593980147e-05,
      "loss": 2.9844,
      "step": 471600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.492795389048992e-05,
      "loss": 2.97,
      "step": 471700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.492475184117836e-05,
      "loss": 2.993,
      "step": 471800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.49215497918668e-05,
      "loss": 2.9509,
      "step": 471900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.491834774255524e-05,
      "loss": 3.0046,
      "step": 472000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7050678730010986,
      "eval_runtime": 109.2389,
      "eval_samples_per_second": 91.542,
      "eval_steps_per_second": 5.721,
      "step": 472000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.491514569324368e-05,
      "loss": 2.9768,
      "step": 472100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.491194364393212e-05,
      "loss": 2.9947,
      "step": 472200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4908741594620557e-05,
      "loss": 3.0025,
      "step": 472300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4905539545309e-05,
      "loss": 2.9726,
      "step": 472400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4902337495997436e-05,
      "loss": 3.0064,
      "step": 472500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.489913544668588e-05,
      "loss": 3.0097,
      "step": 472600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.489593339737432e-05,
      "loss": 2.972,
      "step": 472700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.489273134806276e-05,
      "loss": 2.9579,
      "step": 472800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.48895292987512e-05,
      "loss": 2.9949,
      "step": 472900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.488632724943964e-05,
      "loss": 2.9808,
      "step": 473000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7034125328063965,
      "eval_runtime": 110.9918,
      "eval_samples_per_second": 90.097,
      "eval_steps_per_second": 5.631,
      "step": 473000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.488312520012809e-05,
      "loss": 2.9889,
      "step": 473100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.487992315081652e-05,
      "loss": 2.9911,
      "step": 473200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.487672110150497e-05,
      "loss": 2.9816,
      "step": 473300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.487351905219341e-05,
      "loss": 2.9684,
      "step": 473400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.487031700288185e-05,
      "loss": 2.9933,
      "step": 473500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.486711495357029e-05,
      "loss": 3.0133,
      "step": 473600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4863912904258726e-05,
      "loss": 2.9793,
      "step": 473700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.486071085494717e-05,
      "loss": 3.0031,
      "step": 473800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4857508805635606e-05,
      "loss": 2.9665,
      "step": 473900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.485430675632405e-05,
      "loss": 2.9788,
      "step": 474000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.704502582550049,
      "eval_runtime": 120.2572,
      "eval_samples_per_second": 83.155,
      "eval_steps_per_second": 5.197,
      "step": 474000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4851104707012485e-05,
      "loss": 2.9854,
      "step": 474100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.484790265770093e-05,
      "loss": 2.9959,
      "step": 474200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.484470060838937e-05,
      "loss": 2.9927,
      "step": 474300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.484149855907781e-05,
      "loss": 2.9775,
      "step": 474400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.483829650976625e-05,
      "loss": 2.9728,
      "step": 474500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.483509446045469e-05,
      "loss": 2.9658,
      "step": 474600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.483189241114314e-05,
      "loss": 2.9695,
      "step": 474700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.482869036183157e-05,
      "loss": 2.9745,
      "step": 474800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.482548831252002e-05,
      "loss": 2.9823,
      "step": 474900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.482228626320846e-05,
      "loss": 2.9554,
      "step": 475000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.705043315887451,
      "eval_runtime": 120.8832,
      "eval_samples_per_second": 82.724,
      "eval_steps_per_second": 5.17,
      "step": 475000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4819084213896896e-05,
      "loss": 2.9822,
      "step": 475100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4815882164585336e-05,
      "loss": 2.9999,
      "step": 475200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4812680115273776e-05,
      "loss": 2.9284,
      "step": 475300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.480947806596222e-05,
      "loss": 2.972,
      "step": 475400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4806276016650655e-05,
      "loss": 3.0147,
      "step": 475500
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.48030739673391e-05,
      "loss": 3.0143,
      "step": 475600
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4799871918027535e-05,
      "loss": 2.9882,
      "step": 475700
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.479666986871598e-05,
      "loss": 3.0136,
      "step": 475800
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.479346781940442e-05,
      "loss": 2.9908,
      "step": 475900
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.479026577009286e-05,
      "loss": 2.9924,
      "step": 476000
    },
    {
      "epoch": 0.3,
      "eval_loss": 2.7021644115448,
      "eval_runtime": 121.2004,
      "eval_samples_per_second": 82.508,
      "eval_steps_per_second": 5.157,
      "step": 476000
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.478706372078131e-05,
      "loss": 2.9803,
      "step": 476100
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.478386167146974e-05,
      "loss": 2.9896,
      "step": 476200
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.478065962215819e-05,
      "loss": 2.9767,
      "step": 476300
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.477745757284662e-05,
      "loss": 2.9955,
      "step": 476400
    },
    {
      "epoch": 0.3,
      "learning_rate": 3.4774255523535066e-05,
      "loss": 2.9844,
      "step": 476500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4771053474223506e-05,
      "loss": 2.9668,
      "step": 476600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4767851424911946e-05,
      "loss": 2.9844,
      "step": 476700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4764649375600385e-05,
      "loss": 3.003,
      "step": 476800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4761447326288825e-05,
      "loss": 3.014,
      "step": 476900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.475824527697727e-05,
      "loss": 2.9788,
      "step": 477000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.70169734954834,
      "eval_runtime": 139.4307,
      "eval_samples_per_second": 71.72,
      "eval_steps_per_second": 4.483,
      "step": 477000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4755043227665705e-05,
      "loss": 2.9809,
      "step": 477100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.475184117835415e-05,
      "loss": 2.9638,
      "step": 477200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4748639129042584e-05,
      "loss": 2.9861,
      "step": 477300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.474543707973103e-05,
      "loss": 2.9922,
      "step": 477400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.474223503041947e-05,
      "loss": 2.9707,
      "step": 477500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.473903298110791e-05,
      "loss": 2.965,
      "step": 477600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.473583093179636e-05,
      "loss": 2.9861,
      "step": 477700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.473262888248479e-05,
      "loss": 2.987,
      "step": 477800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4729426833173236e-05,
      "loss": 2.9875,
      "step": 477900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.472622478386167e-05,
      "loss": 3.0105,
      "step": 478000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.700936794281006,
      "eval_runtime": 120.8064,
      "eval_samples_per_second": 82.777,
      "eval_steps_per_second": 5.174,
      "step": 478000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4723022734550116e-05,
      "loss": 3.0044,
      "step": 478100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4719820685238555e-05,
      "loss": 2.9886,
      "step": 478200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4716618635926995e-05,
      "loss": 2.9784,
      "step": 478300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4713416586615435e-05,
      "loss": 3.0098,
      "step": 478400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4710214537303875e-05,
      "loss": 2.9681,
      "step": 478500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.470701248799232e-05,
      "loss": 2.9886,
      "step": 478600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4703810438680754e-05,
      "loss": 2.9856,
      "step": 478700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.47006083893692e-05,
      "loss": 2.996,
      "step": 478800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4697406340057634e-05,
      "loss": 2.9904,
      "step": 478900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.469420429074608e-05,
      "loss": 3.0293,
      "step": 479000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.6991958618164062,
      "eval_runtime": 120.9389,
      "eval_samples_per_second": 82.686,
      "eval_steps_per_second": 5.168,
      "step": 479000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.469100224143452e-05,
      "loss": 2.9948,
      "step": 479100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.468780019212296e-05,
      "loss": 3.0021,
      "step": 479200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4684598142811406e-05,
      "loss": 2.9895,
      "step": 479300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.468139609349984e-05,
      "loss": 2.9742,
      "step": 479400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4678194044188286e-05,
      "loss": 2.9914,
      "step": 479500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.467499199487672e-05,
      "loss": 2.9712,
      "step": 479600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4671789945565165e-05,
      "loss": 2.9973,
      "step": 479700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4668587896253605e-05,
      "loss": 3.0053,
      "step": 479800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4665385846942044e-05,
      "loss": 2.9978,
      "step": 479900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4662183797630484e-05,
      "loss": 2.999,
      "step": 480000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.6992626190185547,
      "eval_runtime": 119.8723,
      "eval_samples_per_second": 83.422,
      "eval_steps_per_second": 5.214,
      "step": 480000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4658981748318924e-05,
      "loss": 2.9516,
      "step": 480100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.465577969900737e-05,
      "loss": 2.9706,
      "step": 480200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4652577649695803e-05,
      "loss": 2.9652,
      "step": 480300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.464937560038425e-05,
      "loss": 2.9725,
      "step": 480400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.464617355107268e-05,
      "loss": 2.9892,
      "step": 480500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.464297150176113e-05,
      "loss": 2.9891,
      "step": 480600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.463976945244957e-05,
      "loss": 2.9825,
      "step": 480700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.463656740313801e-05,
      "loss": 2.9549,
      "step": 480800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4633365353826455e-05,
      "loss": 2.976,
      "step": 480900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.463016330451489e-05,
      "loss": 2.996,
      "step": 481000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.7004551887512207,
      "eval_runtime": 120.2703,
      "eval_samples_per_second": 83.146,
      "eval_steps_per_second": 5.197,
      "step": 481000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4626961255203335e-05,
      "loss": 2.981,
      "step": 481100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.462375920589177e-05,
      "loss": 2.9717,
      "step": 481200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4620557156580214e-05,
      "loss": 2.996,
      "step": 481300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4617355107268654e-05,
      "loss": 2.9754,
      "step": 481400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4614153057957094e-05,
      "loss": 3.0134,
      "step": 481500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4610951008645534e-05,
      "loss": 3.0125,
      "step": 481600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.460774895933397e-05,
      "loss": 2.9905,
      "step": 481700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.460454691002242e-05,
      "loss": 2.9998,
      "step": 481800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.460134486071085e-05,
      "loss": 2.9843,
      "step": 481900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.45981428113993e-05,
      "loss": 2.9709,
      "step": 482000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.70105242729187,
      "eval_runtime": 119.9553,
      "eval_samples_per_second": 83.364,
      "eval_steps_per_second": 5.21,
      "step": 482000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.459494076208773e-05,
      "loss": 2.998,
      "step": 482100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.459173871277618e-05,
      "loss": 2.9792,
      "step": 482200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.458853666346462e-05,
      "loss": 2.9786,
      "step": 482300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.458533461415306e-05,
      "loss": 2.9811,
      "step": 482400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4582132564841505e-05,
      "loss": 2.9922,
      "step": 482500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.457893051552994e-05,
      "loss": 2.9883,
      "step": 482600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4575728466218384e-05,
      "loss": 2.9711,
      "step": 482700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.457252641690682e-05,
      "loss": 2.9925,
      "step": 482800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4569324367595264e-05,
      "loss": 2.9921,
      "step": 482900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4566122318283703e-05,
      "loss": 2.9752,
      "step": 483000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.699043035507202,
      "eval_runtime": 119.3722,
      "eval_samples_per_second": 83.772,
      "eval_steps_per_second": 5.236,
      "step": 483000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.456292026897214e-05,
      "loss": 2.9814,
      "step": 483100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.455971821966058e-05,
      "loss": 2.9539,
      "step": 483200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.455651617034902e-05,
      "loss": 2.9643,
      "step": 483300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.455331412103747e-05,
      "loss": 3.0026,
      "step": 483400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.45501120717259e-05,
      "loss": 2.9827,
      "step": 483500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.454691002241435e-05,
      "loss": 3.0023,
      "step": 483600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.454370797310279e-05,
      "loss": 2.9841,
      "step": 483700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.454050592379123e-05,
      "loss": 2.9592,
      "step": 483800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.453730387447967e-05,
      "loss": 3.0026,
      "step": 483900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.453410182516811e-05,
      "loss": 2.9665,
      "step": 484000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.699373960494995,
      "eval_runtime": 118.6868,
      "eval_samples_per_second": 84.255,
      "eval_steps_per_second": 5.266,
      "step": 484000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4530899775856554e-05,
      "loss": 2.9808,
      "step": 484100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.452769772654499e-05,
      "loss": 2.9879,
      "step": 484200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4524495677233434e-05,
      "loss": 2.9956,
      "step": 484300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.452129362792187e-05,
      "loss": 2.9928,
      "step": 484400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.451809157861031e-05,
      "loss": 2.9904,
      "step": 484500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.451488952929875e-05,
      "loss": 2.9889,
      "step": 484600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.451168747998719e-05,
      "loss": 2.9947,
      "step": 484700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.450848543067563e-05,
      "loss": 3.0216,
      "step": 484800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.450528338136407e-05,
      "loss": 3.0045,
      "step": 484900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.450208133205252e-05,
      "loss": 2.9863,
      "step": 485000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.6997878551483154,
      "eval_runtime": 139.1489,
      "eval_samples_per_second": 71.865,
      "eval_steps_per_second": 4.492,
      "step": 485000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.449887928274095e-05,
      "loss": 2.9836,
      "step": 485100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.44956772334294e-05,
      "loss": 3.0025,
      "step": 485200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.449247518411784e-05,
      "loss": 2.9567,
      "step": 485300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.448927313480628e-05,
      "loss": 3.0039,
      "step": 485400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.448607108549472e-05,
      "loss": 2.9997,
      "step": 485500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.448286903618316e-05,
      "loss": 2.9687,
      "step": 485600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4479666986871604e-05,
      "loss": 2.996,
      "step": 485700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4476464937560037e-05,
      "loss": 2.9753,
      "step": 485800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.447326288824848e-05,
      "loss": 2.959,
      "step": 485900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.447006083893692e-05,
      "loss": 2.9828,
      "step": 486000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.698928117752075,
      "eval_runtime": 118.9081,
      "eval_samples_per_second": 84.099,
      "eval_steps_per_second": 5.256,
      "step": 486000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.446685878962536e-05,
      "loss": 2.9942,
      "step": 486100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.44636567403138e-05,
      "loss": 3.0108,
      "step": 486200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.446045469100224e-05,
      "loss": 2.9775,
      "step": 486300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.445725264169068e-05,
      "loss": 3.0055,
      "step": 486400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.445405059237912e-05,
      "loss": 2.9616,
      "step": 486500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.445084854306757e-05,
      "loss": 2.9705,
      "step": 486600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4447646493756e-05,
      "loss": 2.9839,
      "step": 486700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.444444444444445e-05,
      "loss": 2.9769,
      "step": 486800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.444124239513289e-05,
      "loss": 2.9855,
      "step": 486900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.443804034582133e-05,
      "loss": 3.005,
      "step": 487000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.7013635635375977,
      "eval_runtime": 117.5932,
      "eval_samples_per_second": 85.039,
      "eval_steps_per_second": 5.315,
      "step": 487000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.443483829650977e-05,
      "loss": 2.9973,
      "step": 487100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4431636247198206e-05,
      "loss": 2.9858,
      "step": 487200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.442843419788665e-05,
      "loss": 2.9763,
      "step": 487300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4425232148575086e-05,
      "loss": 2.9527,
      "step": 487400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.442203009926353e-05,
      "loss": 2.9708,
      "step": 487500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.441882804995197e-05,
      "loss": 2.9793,
      "step": 487600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.441562600064041e-05,
      "loss": 2.9889,
      "step": 487700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.441242395132885e-05,
      "loss": 2.9907,
      "step": 487800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.440922190201729e-05,
      "loss": 2.9625,
      "step": 487900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.440601985270573e-05,
      "loss": 2.9665,
      "step": 488000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.7013356685638428,
      "eval_runtime": 116.9775,
      "eval_samples_per_second": 85.487,
      "eval_steps_per_second": 5.343,
      "step": 488000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.440281780339417e-05,
      "loss": 2.9593,
      "step": 488100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.439961575408262e-05,
      "loss": 2.9832,
      "step": 488200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.439641370477106e-05,
      "loss": 2.9595,
      "step": 488300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.43932116554595e-05,
      "loss": 2.9951,
      "step": 488400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4390009606147937e-05,
      "loss": 2.9733,
      "step": 488500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4386807556836376e-05,
      "loss": 3.0036,
      "step": 488600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4383605507524816e-05,
      "loss": 2.978,
      "step": 488700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4380403458213256e-05,
      "loss": 3.0065,
      "step": 488800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.43772014089017e-05,
      "loss": 2.988,
      "step": 488900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4373999359590135e-05,
      "loss": 2.9862,
      "step": 489000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.7001631259918213,
      "eval_runtime": 117.0836,
      "eval_samples_per_second": 85.409,
      "eval_steps_per_second": 5.338,
      "step": 489000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.437079731027858e-05,
      "loss": 2.9822,
      "step": 489100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.436759526096702e-05,
      "loss": 2.9394,
      "step": 489200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.436439321165546e-05,
      "loss": 2.96,
      "step": 489300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.43611911623439e-05,
      "loss": 2.9837,
      "step": 489400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.435798911303234e-05,
      "loss": 2.9913,
      "step": 489500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.435478706372078e-05,
      "loss": 2.9779,
      "step": 489600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.435158501440922e-05,
      "loss": 2.9945,
      "step": 489700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.434838296509767e-05,
      "loss": 2.9618,
      "step": 489800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4345180915786106e-05,
      "loss": 2.9797,
      "step": 489900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4341978866474546e-05,
      "loss": 2.989,
      "step": 490000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.7004354000091553,
      "eval_runtime": 116.9312,
      "eval_samples_per_second": 85.52,
      "eval_steps_per_second": 5.345,
      "step": 490000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4338776817162986e-05,
      "loss": 2.951,
      "step": 490100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4335574767851426e-05,
      "loss": 2.9966,
      "step": 490200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4332372718539865e-05,
      "loss": 2.9861,
      "step": 490300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4329170669228305e-05,
      "loss": 2.9971,
      "step": 490400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.432596861991675e-05,
      "loss": 2.9975,
      "step": 490500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.432276657060519e-05,
      "loss": 2.9886,
      "step": 490600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.431956452129363e-05,
      "loss": 2.9706,
      "step": 490700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.431636247198207e-05,
      "loss": 2.9648,
      "step": 490800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.431316042267051e-05,
      "loss": 2.9843,
      "step": 490900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.430995837335895e-05,
      "loss": 2.981,
      "step": 491000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.6995768547058105,
      "eval_runtime": 115.9431,
      "eval_samples_per_second": 86.249,
      "eval_steps_per_second": 5.391,
      "step": 491000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.430675632404739e-05,
      "loss": 3.0003,
      "step": 491100
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.430355427473583e-05,
      "loss": 2.9613,
      "step": 491200
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.430035222542427e-05,
      "loss": 2.9977,
      "step": 491300
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4297150176112716e-05,
      "loss": 2.9556,
      "step": 491400
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4293948126801156e-05,
      "loss": 2.9958,
      "step": 491500
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4290746077489596e-05,
      "loss": 2.9706,
      "step": 491600
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4287544028178035e-05,
      "loss": 3.0083,
      "step": 491700
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4284341978866475e-05,
      "loss": 2.9753,
      "step": 491800
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4281139929554915e-05,
      "loss": 2.9955,
      "step": 491900
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.4277937880243355e-05,
      "loss": 2.9813,
      "step": 492000
    },
    {
      "epoch": 0.31,
      "eval_loss": 2.700209856033325,
      "eval_runtime": 115.5124,
      "eval_samples_per_second": 86.571,
      "eval_steps_per_second": 5.411,
      "step": 492000
    },
    {
      "epoch": 0.31,
      "learning_rate": 3.42747358309318e-05,
      "loss": 2.9767,
      "step": 492100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.427153378162024e-05,
      "loss": 2.9738,
      "step": 492200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.426833173230868e-05,
      "loss": 2.9862,
      "step": 492300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.426512968299712e-05,
      "loss": 2.9721,
      "step": 492400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.426192763368556e-05,
      "loss": 2.9879,
      "step": 492500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4258725584374e-05,
      "loss": 2.9462,
      "step": 492600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.425552353506244e-05,
      "loss": 2.9862,
      "step": 492700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.425232148575088e-05,
      "loss": 2.9802,
      "step": 492800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4249119436439326e-05,
      "loss": 2.9753,
      "step": 492900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4245917387127765e-05,
      "loss": 2.9727,
      "step": 493000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.7002105712890625,
      "eval_runtime": 136.0692,
      "eval_samples_per_second": 73.492,
      "eval_steps_per_second": 4.593,
      "step": 493000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4242715337816205e-05,
      "loss": 2.991,
      "step": 493100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4239513288504645e-05,
      "loss": 2.9614,
      "step": 493200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4236311239193085e-05,
      "loss": 2.9775,
      "step": 493300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4233109189881524e-05,
      "loss": 2.9706,
      "step": 493400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4229907140569964e-05,
      "loss": 2.9557,
      "step": 493500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.422670509125841e-05,
      "loss": 2.9521,
      "step": 493600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.422350304194685e-05,
      "loss": 2.9976,
      "step": 493700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.422030099263529e-05,
      "loss": 2.991,
      "step": 493800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.421709894332373e-05,
      "loss": 2.9772,
      "step": 493900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.421389689401217e-05,
      "loss": 2.9555,
      "step": 494000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.7009117603302,
      "eval_runtime": 115.8749,
      "eval_samples_per_second": 86.3,
      "eval_steps_per_second": 5.394,
      "step": 494000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.421069484470061e-05,
      "loss": 2.9742,
      "step": 494100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.420749279538905e-05,
      "loss": 2.9658,
      "step": 494200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.420429074607749e-05,
      "loss": 2.9602,
      "step": 494300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.420108869676593e-05,
      "loss": 2.9765,
      "step": 494400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4197886647454375e-05,
      "loss": 2.9891,
      "step": 494500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4194684598142815e-05,
      "loss": 2.94,
      "step": 494600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4191482548831255e-05,
      "loss": 2.9705,
      "step": 494700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4188280499519694e-05,
      "loss": 2.9954,
      "step": 494800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4185078450208134e-05,
      "loss": 2.9835,
      "step": 494900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4181876400896574e-05,
      "loss": 2.9507,
      "step": 495000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.6974098682403564,
      "eval_runtime": 115.2128,
      "eval_samples_per_second": 86.796,
      "eval_steps_per_second": 5.425,
      "step": 495000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4178674351585014e-05,
      "loss": 2.9865,
      "step": 495100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.417547230227346e-05,
      "loss": 2.9616,
      "step": 495200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.41722702529619e-05,
      "loss": 2.957,
      "step": 495300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.416906820365034e-05,
      "loss": 2.986,
      "step": 495400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.416586615433878e-05,
      "loss": 2.9832,
      "step": 495500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.416266410502722e-05,
      "loss": 2.971,
      "step": 495600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.415946205571566e-05,
      "loss": 2.9991,
      "step": 495700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.41562600064041e-05,
      "loss": 2.9826,
      "step": 495800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4153057957092545e-05,
      "loss": 2.9965,
      "step": 495900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.414985590778098e-05,
      "loss": 2.9666,
      "step": 496000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.6976656913757324,
      "eval_runtime": 135.7671,
      "eval_samples_per_second": 73.656,
      "eval_steps_per_second": 4.603,
      "step": 496000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4146653858469424e-05,
      "loss": 2.9919,
      "step": 496100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4143451809157864e-05,
      "loss": 2.9681,
      "step": 496200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4140249759846304e-05,
      "loss": 3.0072,
      "step": 496300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4137047710534744e-05,
      "loss": 2.9683,
      "step": 496400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4133845661223183e-05,
      "loss": 2.9817,
      "step": 496500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.413064361191162e-05,
      "loss": 2.9782,
      "step": 496600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.412744156260006e-05,
      "loss": 2.974,
      "step": 496700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.412423951328851e-05,
      "loss": 2.9662,
      "step": 496800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.412103746397695e-05,
      "loss": 2.9378,
      "step": 496900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.411783541466539e-05,
      "loss": 2.9585,
      "step": 497000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.7000467777252197,
      "eval_runtime": 115.0603,
      "eval_samples_per_second": 86.911,
      "eval_steps_per_second": 5.432,
      "step": 497000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.411463336535383e-05,
      "loss": 2.9539,
      "step": 497100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.411143131604227e-05,
      "loss": 2.9636,
      "step": 497200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.410822926673071e-05,
      "loss": 2.9822,
      "step": 497300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.410502721741915e-05,
      "loss": 2.9872,
      "step": 497400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4101825168107594e-05,
      "loss": 2.9918,
      "step": 497500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.409862311879603e-05,
      "loss": 2.975,
      "step": 497600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4095421069484474e-05,
      "loss": 2.9354,
      "step": 497700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4092219020172914e-05,
      "loss": 2.9592,
      "step": 497800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.408901697086135e-05,
      "loss": 2.9699,
      "step": 497900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.408581492154979e-05,
      "loss": 2.9725,
      "step": 498000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.7000303268432617,
      "eval_runtime": 114.722,
      "eval_samples_per_second": 87.167,
      "eval_steps_per_second": 5.448,
      "step": 498000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.408261287223823e-05,
      "loss": 2.993,
      "step": 498100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.407941082292668e-05,
      "loss": 3.0031,
      "step": 498200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.407620877361511e-05,
      "loss": 2.9906,
      "step": 498300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.407300672430356e-05,
      "loss": 2.9691,
      "step": 498400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4069804674992e-05,
      "loss": 2.9635,
      "step": 498500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.406660262568044e-05,
      "loss": 2.9676,
      "step": 498600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.406340057636888e-05,
      "loss": 2.9802,
      "step": 498700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.406019852705732e-05,
      "loss": 3.0085,
      "step": 498800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.405699647774576e-05,
      "loss": 2.9704,
      "step": 498900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.40537944284342e-05,
      "loss": 2.9661,
      "step": 499000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.6994948387145996,
      "eval_runtime": 114.1145,
      "eval_samples_per_second": 87.631,
      "eval_steps_per_second": 5.477,
      "step": 499000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4050592379122644e-05,
      "loss": 2.9977,
      "step": 499100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.404739032981108e-05,
      "loss": 2.9799,
      "step": 499200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.404418828049952e-05,
      "loss": 2.9854,
      "step": 499300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.404098623118796e-05,
      "loss": 2.9725,
      "step": 499400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.40377841818764e-05,
      "loss": 2.9502,
      "step": 499500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.403458213256484e-05,
      "loss": 2.9826,
      "step": 499600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.403138008325328e-05,
      "loss": 2.9739,
      "step": 499700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.402817803394173e-05,
      "loss": 2.9768,
      "step": 499800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.402497598463016e-05,
      "loss": 2.997,
      "step": 499900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.402177393531861e-05,
      "loss": 2.9638,
      "step": 500000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.701573371887207,
      "eval_runtime": 114.5606,
      "eval_samples_per_second": 87.29,
      "eval_steps_per_second": 5.456,
      "step": 500000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.401857188600705e-05,
      "loss": 2.9799,
      "step": 500100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.401536983669549e-05,
      "loss": 2.9885,
      "step": 500200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.401216778738393e-05,
      "loss": 2.9688,
      "step": 500300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.400896573807237e-05,
      "loss": 2.964,
      "step": 500400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4005763688760814e-05,
      "loss": 2.9666,
      "step": 500500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.4002561639449247e-05,
      "loss": 2.9917,
      "step": 500600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.399935959013769e-05,
      "loss": 2.9731,
      "step": 500700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3996157540826126e-05,
      "loss": 2.9878,
      "step": 500800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.399295549151457e-05,
      "loss": 2.9826,
      "step": 500900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.398975344220301e-05,
      "loss": 2.9617,
      "step": 501000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.7004215717315674,
      "eval_runtime": 114.5809,
      "eval_samples_per_second": 87.275,
      "eval_steps_per_second": 5.455,
      "step": 501000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.398655139289145e-05,
      "loss": 2.9966,
      "step": 501100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.39833493435799e-05,
      "loss": 2.9623,
      "step": 501200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.398014729426833e-05,
      "loss": 2.9664,
      "step": 501300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.397694524495678e-05,
      "loss": 2.985,
      "step": 501400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.397374319564521e-05,
      "loss": 3.0015,
      "step": 501500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.397054114633366e-05,
      "loss": 2.985,
      "step": 501600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.39673390970221e-05,
      "loss": 2.9854,
      "step": 501700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.396413704771054e-05,
      "loss": 2.9773,
      "step": 501800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.396093499839898e-05,
      "loss": 2.9594,
      "step": 501900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3957732949087416e-05,
      "loss": 2.9807,
      "step": 502000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.701042890548706,
      "eval_runtime": 114.7922,
      "eval_samples_per_second": 87.114,
      "eval_steps_per_second": 5.445,
      "step": 502000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.395453089977586e-05,
      "loss": 2.9751,
      "step": 502100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3951328850464296e-05,
      "loss": 2.9556,
      "step": 502200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.394812680115274e-05,
      "loss": 2.9849,
      "step": 502300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3944924751841175e-05,
      "loss": 2.9606,
      "step": 502400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.394172270252962e-05,
      "loss": 2.9643,
      "step": 502500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.393852065321806e-05,
      "loss": 2.9602,
      "step": 502600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.39353186039065e-05,
      "loss": 2.97,
      "step": 502700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.393211655459495e-05,
      "loss": 2.9683,
      "step": 502800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.392891450528338e-05,
      "loss": 2.9433,
      "step": 502900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.392571245597183e-05,
      "loss": 2.9756,
      "step": 503000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.700615167617798,
      "eval_runtime": 114.7908,
      "eval_samples_per_second": 87.115,
      "eval_steps_per_second": 5.445,
      "step": 503000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.392251040666026e-05,
      "loss": 2.9712,
      "step": 503100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.391930835734871e-05,
      "loss": 2.9748,
      "step": 503200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.391610630803715e-05,
      "loss": 2.971,
      "step": 503300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3912904258725586e-05,
      "loss": 2.9941,
      "step": 503400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3909702209414026e-05,
      "loss": 2.9805,
      "step": 503500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3906500160102466e-05,
      "loss": 2.9932,
      "step": 503600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.390329811079091e-05,
      "loss": 2.9677,
      "step": 503700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3900096061479345e-05,
      "loss": 2.9897,
      "step": 503800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.389689401216779e-05,
      "loss": 2.958,
      "step": 503900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3893691962856225e-05,
      "loss": 2.9937,
      "step": 504000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.6996469497680664,
      "eval_runtime": 132.7545,
      "eval_samples_per_second": 75.327,
      "eval_steps_per_second": 4.708,
      "step": 504000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.389048991354467e-05,
      "loss": 3.0065,
      "step": 504100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.388728786423311e-05,
      "loss": 2.9888,
      "step": 504200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.388408581492155e-05,
      "loss": 2.9936,
      "step": 504300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.388088376561e-05,
      "loss": 2.9848,
      "step": 504400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.387768171629843e-05,
      "loss": 2.9718,
      "step": 504500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.387447966698688e-05,
      "loss": 2.9711,
      "step": 504600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.387127761767531e-05,
      "loss": 2.9965,
      "step": 504700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3868075568363756e-05,
      "loss": 3.0023,
      "step": 504800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3864873519052196e-05,
      "loss": 2.9923,
      "step": 504900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3861671469740636e-05,
      "loss": 2.9633,
      "step": 505000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.7002615928649902,
      "eval_runtime": 109.8675,
      "eval_samples_per_second": 91.019,
      "eval_steps_per_second": 5.689,
      "step": 505000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3858469420429075e-05,
      "loss": 2.986,
      "step": 505100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3855267371117515e-05,
      "loss": 2.9558,
      "step": 505200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.385206532180596e-05,
      "loss": 2.9799,
      "step": 505300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3848863272494395e-05,
      "loss": 2.9964,
      "step": 505400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.384566122318284e-05,
      "loss": 2.9921,
      "step": 505500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3842459173871274e-05,
      "loss": 3.0179,
      "step": 505600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.383925712455972e-05,
      "loss": 2.9813,
      "step": 505700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.383605507524816e-05,
      "loss": 2.9929,
      "step": 505800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.38328530259366e-05,
      "loss": 2.9799,
      "step": 505900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.382965097662505e-05,
      "loss": 2.9794,
      "step": 506000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.6990132331848145,
      "eval_runtime": 109.2572,
      "eval_samples_per_second": 91.527,
      "eval_steps_per_second": 5.72,
      "step": 506000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.382644892731348e-05,
      "loss": 2.9809,
      "step": 506100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3823246878001926e-05,
      "loss": 3.0148,
      "step": 506200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.382004482869036e-05,
      "loss": 2.9666,
      "step": 506300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3816842779378806e-05,
      "loss": 2.993,
      "step": 506400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.381364073006724e-05,
      "loss": 2.9878,
      "step": 506500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3810438680755685e-05,
      "loss": 2.9707,
      "step": 506600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3807236631444125e-05,
      "loss": 2.9935,
      "step": 506700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3804034582132565e-05,
      "loss": 2.9865,
      "step": 506800
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.380083253282101e-05,
      "loss": 3.0006,
      "step": 506900
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3797630483509444e-05,
      "loss": 2.9606,
      "step": 507000
    },
    {
      "epoch": 0.32,
      "eval_loss": 2.697902202606201,
      "eval_runtime": 128.2924,
      "eval_samples_per_second": 77.947,
      "eval_steps_per_second": 4.872,
      "step": 507000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.379442843419789e-05,
      "loss": 2.9803,
      "step": 507100
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3791226384886324e-05,
      "loss": 3.008,
      "step": 507200
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.378802433557477e-05,
      "loss": 2.9843,
      "step": 507300
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.378482228626321e-05,
      "loss": 2.9893,
      "step": 507400
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.378162023695165e-05,
      "loss": 2.9864,
      "step": 507500
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3778418187640096e-05,
      "loss": 3.0019,
      "step": 507600
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.377521613832853e-05,
      "loss": 2.9773,
      "step": 507700
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.3772014089016976e-05,
      "loss": 2.9916,
      "step": 507800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.376881203970541e-05,
      "loss": 2.9581,
      "step": 507900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3765609990393855e-05,
      "loss": 3.0016,
      "step": 508000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6988649368286133,
      "eval_runtime": 109.3865,
      "eval_samples_per_second": 91.419,
      "eval_steps_per_second": 5.714,
      "step": 508000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3762407941082295e-05,
      "loss": 2.964,
      "step": 508100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3759205891770734e-05,
      "loss": 2.9912,
      "step": 508200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3756003842459174e-05,
      "loss": 2.9554,
      "step": 508300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3752801793147614e-05,
      "loss": 2.9847,
      "step": 508400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.374959974383606e-05,
      "loss": 2.9731,
      "step": 508500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3746397694524493e-05,
      "loss": 2.9675,
      "step": 508600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.374319564521294e-05,
      "loss": 2.991,
      "step": 508700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.373999359590137e-05,
      "loss": 2.9742,
      "step": 508800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.373679154658982e-05,
      "loss": 2.9918,
      "step": 508900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.373358949727826e-05,
      "loss": 3.0087,
      "step": 509000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6963632106781006,
      "eval_runtime": 109.7842,
      "eval_samples_per_second": 91.088,
      "eval_steps_per_second": 5.693,
      "step": 509000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.37303874479667e-05,
      "loss": 3.0023,
      "step": 509100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3727185398655145e-05,
      "loss": 2.9866,
      "step": 509200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.372398334934358e-05,
      "loss": 2.9728,
      "step": 509300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3720781300032025e-05,
      "loss": 2.9345,
      "step": 509400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.371757925072046e-05,
      "loss": 2.9883,
      "step": 509500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3714377201408904e-05,
      "loss": 2.9658,
      "step": 509600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3711175152097344e-05,
      "loss": 2.9664,
      "step": 509700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3707973102785784e-05,
      "loss": 2.9555,
      "step": 509800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3704771053474224e-05,
      "loss": 2.9718,
      "step": 509900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.370156900416266e-05,
      "loss": 2.9739,
      "step": 510000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6985952854156494,
      "eval_runtime": 130.7879,
      "eval_samples_per_second": 76.46,
      "eval_steps_per_second": 4.779,
      "step": 510000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.369836695485111e-05,
      "loss": 2.9911,
      "step": 510100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.369516490553954e-05,
      "loss": 2.9891,
      "step": 510200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.369196285622799e-05,
      "loss": 2.981,
      "step": 510300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.368876080691643e-05,
      "loss": 2.978,
      "step": 510400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.368555875760487e-05,
      "loss": 2.9558,
      "step": 510500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.368235670829331e-05,
      "loss": 2.9737,
      "step": 510600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.367915465898175e-05,
      "loss": 3.0162,
      "step": 510700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3675952609670195e-05,
      "loss": 2.9748,
      "step": 510800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.367275056035863e-05,
      "loss": 2.9845,
      "step": 510900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3669548511047074e-05,
      "loss": 3.011,
      "step": 511000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.696922540664673,
      "eval_runtime": 109.8138,
      "eval_samples_per_second": 91.063,
      "eval_steps_per_second": 5.691,
      "step": 511000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3666346461735514e-05,
      "loss": 2.9968,
      "step": 511100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3663144412423954e-05,
      "loss": 2.9896,
      "step": 511200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3659942363112393e-05,
      "loss": 2.9794,
      "step": 511300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.365674031380083e-05,
      "loss": 2.9808,
      "step": 511400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.365353826448927e-05,
      "loss": 2.9567,
      "step": 511500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.365033621517771e-05,
      "loss": 2.9883,
      "step": 511600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.364713416586616e-05,
      "loss": 2.9751,
      "step": 511700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.364393211655459e-05,
      "loss": 2.983,
      "step": 511800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.364073006724304e-05,
      "loss": 2.9823,
      "step": 511900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.363752801793148e-05,
      "loss": 2.9961,
      "step": 512000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6993277072906494,
      "eval_runtime": 109.8716,
      "eval_samples_per_second": 91.015,
      "eval_steps_per_second": 5.688,
      "step": 512000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.363432596861992e-05,
      "loss": 2.9701,
      "step": 512100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.363112391930836e-05,
      "loss": 2.9605,
      "step": 512200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.36279218699968e-05,
      "loss": 2.9579,
      "step": 512300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3624719820685244e-05,
      "loss": 2.9654,
      "step": 512400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.362151777137368e-05,
      "loss": 2.9602,
      "step": 512500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3618315722062124e-05,
      "loss": 2.9641,
      "step": 512600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.361511367275056e-05,
      "loss": 2.9826,
      "step": 512700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3611911623439e-05,
      "loss": 2.9531,
      "step": 512800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.360870957412744e-05,
      "loss": 2.9777,
      "step": 512900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.360550752481588e-05,
      "loss": 2.9877,
      "step": 513000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.699265718460083,
      "eval_runtime": 109.7065,
      "eval_samples_per_second": 91.152,
      "eval_steps_per_second": 5.697,
      "step": 513000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.360230547550432e-05,
      "loss": 3.0196,
      "step": 513100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.359910342619276e-05,
      "loss": 2.9618,
      "step": 513200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.359590137688121e-05,
      "loss": 2.9603,
      "step": 513300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.359269932756965e-05,
      "loss": 2.9661,
      "step": 513400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.358949727825809e-05,
      "loss": 2.9846,
      "step": 513500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.358629522894653e-05,
      "loss": 3.0048,
      "step": 513600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.358309317963497e-05,
      "loss": 2.9576,
      "step": 513700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.357989113032341e-05,
      "loss": 2.997,
      "step": 513800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.357668908101185e-05,
      "loss": 2.9978,
      "step": 513900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3573487031700294e-05,
      "loss": 2.9911,
      "step": 514000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.69899320602417,
      "eval_runtime": 109.3237,
      "eval_samples_per_second": 91.471,
      "eval_steps_per_second": 5.717,
      "step": 514000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3570284982388727e-05,
      "loss": 2.9729,
      "step": 514100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.356708293307717e-05,
      "loss": 2.9842,
      "step": 514200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.356388088376561e-05,
      "loss": 2.9684,
      "step": 514300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.356067883445405e-05,
      "loss": 2.9918,
      "step": 514400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.355747678514249e-05,
      "loss": 2.9982,
      "step": 514500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.355427473583093e-05,
      "loss": 2.9768,
      "step": 514600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.355107268651937e-05,
      "loss": 2.9667,
      "step": 514700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.354787063720781e-05,
      "loss": 2.9927,
      "step": 514800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.354466858789626e-05,
      "loss": 2.9887,
      "step": 514900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.35414665385847e-05,
      "loss": 2.9936,
      "step": 515000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.699075698852539,
      "eval_runtime": 109.3367,
      "eval_samples_per_second": 91.461,
      "eval_steps_per_second": 5.716,
      "step": 515000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.353826448927314e-05,
      "loss": 2.9638,
      "step": 515100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.353506243996158e-05,
      "loss": 2.9987,
      "step": 515200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.353186039065002e-05,
      "loss": 2.9697,
      "step": 515300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.352865834133846e-05,
      "loss": 2.9913,
      "step": 515400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3525456292026896e-05,
      "loss": 2.9761,
      "step": 515500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.352225424271534e-05,
      "loss": 2.9812,
      "step": 515600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.351905219340378e-05,
      "loss": 2.9625,
      "step": 515700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.351585014409222e-05,
      "loss": 2.9998,
      "step": 515800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.351264809478066e-05,
      "loss": 2.9814,
      "step": 515900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.35094460454691e-05,
      "loss": 2.9717,
      "step": 516000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6982357501983643,
      "eval_runtime": 109.0276,
      "eval_samples_per_second": 91.72,
      "eval_steps_per_second": 5.732,
      "step": 516000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.350624399615754e-05,
      "loss": 2.988,
      "step": 516100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.350304194684598e-05,
      "loss": 2.9649,
      "step": 516200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.349983989753442e-05,
      "loss": 2.9732,
      "step": 516300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.349663784822286e-05,
      "loss": 2.9761,
      "step": 516400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.349343579891131e-05,
      "loss": 2.9687,
      "step": 516500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.349023374959975e-05,
      "loss": 3.0035,
      "step": 516600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.348703170028819e-05,
      "loss": 2.9621,
      "step": 516700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3483829650976627e-05,
      "loss": 2.9889,
      "step": 516800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3480627601665066e-05,
      "loss": 2.9673,
      "step": 516900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3477425552353506e-05,
      "loss": 2.9765,
      "step": 517000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.696007490158081,
      "eval_runtime": 109.4966,
      "eval_samples_per_second": 91.327,
      "eval_steps_per_second": 5.708,
      "step": 517000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3474223503041946e-05,
      "loss": 3.0057,
      "step": 517100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.347102145373039e-05,
      "loss": 2.9663,
      "step": 517200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.346781940441883e-05,
      "loss": 2.938,
      "step": 517300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.346461735510727e-05,
      "loss": 2.9581,
      "step": 517400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.346141530579571e-05,
      "loss": 2.9758,
      "step": 517500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.345821325648415e-05,
      "loss": 2.9664,
      "step": 517600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.345501120717259e-05,
      "loss": 2.9772,
      "step": 517700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.345180915786103e-05,
      "loss": 2.962,
      "step": 517800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.344860710854947e-05,
      "loss": 2.9834,
      "step": 517900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.344540505923792e-05,
      "loss": 2.9903,
      "step": 518000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.698228120803833,
      "eval_runtime": 109.726,
      "eval_samples_per_second": 91.136,
      "eval_steps_per_second": 5.696,
      "step": 518000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.344220300992636e-05,
      "loss": 2.9587,
      "step": 518100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3439000960614796e-05,
      "loss": 2.9577,
      "step": 518200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3435798911303236e-05,
      "loss": 2.975,
      "step": 518300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3432596861991676e-05,
      "loss": 2.9641,
      "step": 518400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3429394812680116e-05,
      "loss": 2.9757,
      "step": 518500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3426192763368555e-05,
      "loss": 2.9924,
      "step": 518600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3422990714056995e-05,
      "loss": 2.97,
      "step": 518700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.341978866474544e-05,
      "loss": 2.977,
      "step": 518800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.341658661543388e-05,
      "loss": 2.959,
      "step": 518900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.341338456612232e-05,
      "loss": 2.9822,
      "step": 519000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6972148418426514,
      "eval_runtime": 110.0131,
      "eval_samples_per_second": 90.898,
      "eval_steps_per_second": 5.681,
      "step": 519000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.341018251681076e-05,
      "loss": 2.9732,
      "step": 519100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.34069804674992e-05,
      "loss": 2.9786,
      "step": 519200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.340377841818764e-05,
      "loss": 3.0128,
      "step": 519300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.340057636887608e-05,
      "loss": 2.9947,
      "step": 519400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.339737431956452e-05,
      "loss": 2.955,
      "step": 519500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3394172270252966e-05,
      "loss": 2.9495,
      "step": 519600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3390970220941406e-05,
      "loss": 2.9876,
      "step": 519700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3387768171629846e-05,
      "loss": 2.985,
      "step": 519800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3384566122318286e-05,
      "loss": 2.9982,
      "step": 519900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3381364073006725e-05,
      "loss": 2.987,
      "step": 520000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6981844902038574,
      "eval_runtime": 109.5904,
      "eval_samples_per_second": 91.249,
      "eval_steps_per_second": 5.703,
      "step": 520000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3378162023695165e-05,
      "loss": 2.9964,
      "step": 520100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3374959974383605e-05,
      "loss": 2.9822,
      "step": 520200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.337175792507205e-05,
      "loss": 2.9773,
      "step": 520300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.336855587576049e-05,
      "loss": 2.9694,
      "step": 520400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.336535382644893e-05,
      "loss": 2.9796,
      "step": 520500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.336215177713737e-05,
      "loss": 2.954,
      "step": 520600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.335894972782581e-05,
      "loss": 2.9848,
      "step": 520700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.335574767851425e-05,
      "loss": 2.964,
      "step": 520800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.335254562920269e-05,
      "loss": 2.9518,
      "step": 520900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3349343579891136e-05,
      "loss": 2.9955,
      "step": 521000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.7010953426361084,
      "eval_runtime": 109.1903,
      "eval_samples_per_second": 91.583,
      "eval_steps_per_second": 5.724,
      "step": 521000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.334614153057957e-05,
      "loss": 2.9789,
      "step": 521100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3342939481268016e-05,
      "loss": 2.9529,
      "step": 521200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3339737431956455e-05,
      "loss": 2.9581,
      "step": 521300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3336535382644895e-05,
      "loss": 3.004,
      "step": 521400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 2.9693,
      "step": 521500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3330131284021775e-05,
      "loss": 2.9777,
      "step": 521600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3326929234710214e-05,
      "loss": 2.9751,
      "step": 521700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3323727185398654e-05,
      "loss": 2.9608,
      "step": 521800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.33205251360871e-05,
      "loss": 2.9883,
      "step": 521900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.331732308677554e-05,
      "loss": 3.0048,
      "step": 522000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.6976935863494873,
      "eval_runtime": 109.6306,
      "eval_samples_per_second": 91.215,
      "eval_steps_per_second": 5.701,
      "step": 522000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.331412103746398e-05,
      "loss": 2.934,
      "step": 522100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.331091898815242e-05,
      "loss": 2.9661,
      "step": 522200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.330771693884086e-05,
      "loss": 2.9659,
      "step": 522300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.33045148895293e-05,
      "loss": 2.9789,
      "step": 522400
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.330131284021774e-05,
      "loss": 2.9719,
      "step": 522500
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3298110790906186e-05,
      "loss": 2.9732,
      "step": 522600
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.329490874159462e-05,
      "loss": 2.962,
      "step": 522700
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3291706692283065e-05,
      "loss": 3.003,
      "step": 522800
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3288504642971505e-05,
      "loss": 2.9868,
      "step": 522900
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3285302593659945e-05,
      "loss": 2.9807,
      "step": 523000
    },
    {
      "epoch": 0.33,
      "eval_loss": 2.694972038269043,
      "eval_runtime": 109.5795,
      "eval_samples_per_second": 91.258,
      "eval_steps_per_second": 5.704,
      "step": 523000
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3282100544348384e-05,
      "loss": 2.9916,
      "step": 523100
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3278898495036824e-05,
      "loss": 2.9595,
      "step": 523200
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.327569644572527e-05,
      "loss": 2.9506,
      "step": 523300
    },
    {
      "epoch": 0.33,
      "learning_rate": 3.3272494396413704e-05,
      "loss": 2.9667,
      "step": 523400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.326929234710215e-05,
      "loss": 2.9631,
      "step": 523500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.326609029779059e-05,
      "loss": 2.9863,
      "step": 523600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.326288824847903e-05,
      "loss": 2.9561,
      "step": 523700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.325968619916747e-05,
      "loss": 2.9795,
      "step": 523800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.325648414985591e-05,
      "loss": 2.9588,
      "step": 523900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.325328210054435e-05,
      "loss": 2.9746,
      "step": 524000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6947405338287354,
      "eval_runtime": 109.4498,
      "eval_samples_per_second": 91.366,
      "eval_steps_per_second": 5.71,
      "step": 524000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.325008005123279e-05,
      "loss": 2.997,
      "step": 524100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3246878001921235e-05,
      "loss": 2.9723,
      "step": 524200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.324367595260967e-05,
      "loss": 2.9993,
      "step": 524300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3240473903298114e-05,
      "loss": 2.9912,
      "step": 524400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3237271853986554e-05,
      "loss": 2.9946,
      "step": 524500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3234069804674994e-05,
      "loss": 3.0143,
      "step": 524600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3230867755363434e-05,
      "loss": 2.9809,
      "step": 524700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3227665706051873e-05,
      "loss": 2.9868,
      "step": 524800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.322446365674032e-05,
      "loss": 2.9678,
      "step": 524900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.322126160742875e-05,
      "loss": 2.9782,
      "step": 525000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6983931064605713,
      "eval_runtime": 109.5203,
      "eval_samples_per_second": 91.307,
      "eval_steps_per_second": 5.707,
      "step": 525000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.32180595581172e-05,
      "loss": 2.9988,
      "step": 525100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.321485750880563e-05,
      "loss": 2.9495,
      "step": 525200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.321165545949408e-05,
      "loss": 2.9928,
      "step": 525300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.320845341018252e-05,
      "loss": 2.9927,
      "step": 525400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.320525136087096e-05,
      "loss": 2.9757,
      "step": 525500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3202049311559405e-05,
      "loss": 2.9975,
      "step": 525600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.319884726224784e-05,
      "loss": 2.9613,
      "step": 525700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3195645212936284e-05,
      "loss": 2.9633,
      "step": 525800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.319244316362472e-05,
      "loss": 2.9614,
      "step": 525900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3189241114313164e-05,
      "loss": 2.9717,
      "step": 526000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.698443651199341,
      "eval_runtime": 109.7385,
      "eval_samples_per_second": 91.126,
      "eval_steps_per_second": 5.695,
      "step": 526000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3186039065001604e-05,
      "loss": 2.9741,
      "step": 526100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.318283701569004e-05,
      "loss": 2.9623,
      "step": 526200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.317963496637848e-05,
      "loss": 2.9686,
      "step": 526300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.317643291706692e-05,
      "loss": 2.9451,
      "step": 526400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.317323086775537e-05,
      "loss": 2.9714,
      "step": 526500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.31700288184438e-05,
      "loss": 2.9565,
      "step": 526600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.316682676913225e-05,
      "loss": 2.9482,
      "step": 526700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.316362471982068e-05,
      "loss": 2.9679,
      "step": 526800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.316042267050913e-05,
      "loss": 2.9732,
      "step": 526900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.315722062119757e-05,
      "loss": 2.9626,
      "step": 527000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.69562029838562,
      "eval_runtime": 131.8429,
      "eval_samples_per_second": 75.848,
      "eval_steps_per_second": 4.74,
      "step": 527000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.315401857188601e-05,
      "loss": 2.9673,
      "step": 527100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3150816522574454e-05,
      "loss": 2.9643,
      "step": 527200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.314761447326289e-05,
      "loss": 2.9728,
      "step": 527300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3144412423951334e-05,
      "loss": 2.9393,
      "step": 527400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.314121037463977e-05,
      "loss": 2.9767,
      "step": 527500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.313800832532821e-05,
      "loss": 2.9556,
      "step": 527600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.313480627601665e-05,
      "loss": 2.958,
      "step": 527700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.313160422670509e-05,
      "loss": 2.9656,
      "step": 527800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.312840217739354e-05,
      "loss": 2.9757,
      "step": 527900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.312520012808197e-05,
      "loss": 2.9585,
      "step": 528000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6967854499816895,
      "eval_runtime": 109.7801,
      "eval_samples_per_second": 91.091,
      "eval_steps_per_second": 5.693,
      "step": 528000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.312199807877042e-05,
      "loss": 2.9744,
      "step": 528100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.311879602945885e-05,
      "loss": 2.9702,
      "step": 528200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.31155939801473e-05,
      "loss": 2.9807,
      "step": 528300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.311239193083573e-05,
      "loss": 2.9583,
      "step": 528400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.310918988152418e-05,
      "loss": 2.9729,
      "step": 528500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.310598783221262e-05,
      "loss": 2.9725,
      "step": 528600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.310278578290106e-05,
      "loss": 2.9622,
      "step": 528700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3099583733589504e-05,
      "loss": 2.9763,
      "step": 528800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3096381684277937e-05,
      "loss": 2.9828,
      "step": 528900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.309317963496638e-05,
      "loss": 2.9784,
      "step": 529000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.695707321166992,
      "eval_runtime": 109.6705,
      "eval_samples_per_second": 91.182,
      "eval_steps_per_second": 5.699,
      "step": 529000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3089977585654816e-05,
      "loss": 2.9839,
      "step": 529100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.308677553634326e-05,
      "loss": 2.9712,
      "step": 529200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.30835734870317e-05,
      "loss": 3.0006,
      "step": 529300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.308037143772014e-05,
      "loss": 2.95,
      "step": 529400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.307716938840859e-05,
      "loss": 2.9845,
      "step": 529500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.307396733909702e-05,
      "loss": 2.965,
      "step": 529600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.307076528978547e-05,
      "loss": 2.9842,
      "step": 529700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.30675632404739e-05,
      "loss": 2.9617,
      "step": 529800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.306436119116235e-05,
      "loss": 2.9726,
      "step": 529900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.306115914185078e-05,
      "loss": 2.9985,
      "step": 530000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.693387508392334,
      "eval_runtime": 129.2437,
      "eval_samples_per_second": 77.373,
      "eval_steps_per_second": 4.836,
      "step": 530000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.305795709253923e-05,
      "loss": 2.953,
      "step": 530100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.305475504322767e-05,
      "loss": 2.9757,
      "step": 530200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3051552993916106e-05,
      "loss": 3.004,
      "step": 530300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.304835094460455e-05,
      "loss": 2.9644,
      "step": 530400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3045148895292986e-05,
      "loss": 2.9457,
      "step": 530500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.304194684598143e-05,
      "loss": 2.9808,
      "step": 530600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3038744796669865e-05,
      "loss": 2.9695,
      "step": 530700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.303554274735831e-05,
      "loss": 2.9417,
      "step": 530800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.303234069804675e-05,
      "loss": 2.9536,
      "step": 530900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.302913864873519e-05,
      "loss": 2.982,
      "step": 531000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.692915201187134,
      "eval_runtime": 109.1868,
      "eval_samples_per_second": 91.586,
      "eval_steps_per_second": 5.724,
      "step": 531000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.302593659942364e-05,
      "loss": 2.9724,
      "step": 531100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.302273455011207e-05,
      "loss": 2.9757,
      "step": 531200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.301953250080052e-05,
      "loss": 2.9851,
      "step": 531300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.301633045148895e-05,
      "loss": 2.9541,
      "step": 531400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.30131284021774e-05,
      "loss": 2.9637,
      "step": 531500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.300992635286583e-05,
      "loss": 2.9672,
      "step": 531600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3006724303554276e-05,
      "loss": 2.9718,
      "step": 531700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3003522254242716e-05,
      "loss": 2.9794,
      "step": 531800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.3000320204931156e-05,
      "loss": 2.9433,
      "step": 531900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.29971181556196e-05,
      "loss": 2.9801,
      "step": 532000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.695091485977173,
      "eval_runtime": 109.1525,
      "eval_samples_per_second": 91.615,
      "eval_steps_per_second": 5.726,
      "step": 532000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2993916106308035e-05,
      "loss": 2.9668,
      "step": 532100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.299071405699648e-05,
      "loss": 2.9824,
      "step": 532200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2987512007684915e-05,
      "loss": 2.967,
      "step": 532300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.298430995837336e-05,
      "loss": 2.9582,
      "step": 532400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.29811079090618e-05,
      "loss": 2.9635,
      "step": 532500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.297790585975024e-05,
      "loss": 2.9467,
      "step": 532600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.297470381043869e-05,
      "loss": 2.9704,
      "step": 532700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.297150176112712e-05,
      "loss": 2.9919,
      "step": 532800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.296829971181557e-05,
      "loss": 2.9823,
      "step": 532900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2965097662504e-05,
      "loss": 2.9735,
      "step": 533000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6941463947296143,
      "eval_runtime": 130.6278,
      "eval_samples_per_second": 76.553,
      "eval_steps_per_second": 4.785,
      "step": 533000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2961895613192446e-05,
      "loss": 2.9828,
      "step": 533100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2958693563880886e-05,
      "loss": 2.99,
      "step": 533200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2955491514569326e-05,
      "loss": 2.9582,
      "step": 533300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2952289465257765e-05,
      "loss": 2.9753,
      "step": 533400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2949087415946205e-05,
      "loss": 2.9702,
      "step": 533500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.294588536663465e-05,
      "loss": 2.9795,
      "step": 533600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2942683317323085e-05,
      "loss": 2.9584,
      "step": 533700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.293948126801153e-05,
      "loss": 2.9537,
      "step": 533800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2936279218699964e-05,
      "loss": 2.9534,
      "step": 533900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.293307716938841e-05,
      "loss": 2.9644,
      "step": 534000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6947808265686035,
      "eval_runtime": 109.116,
      "eval_samples_per_second": 91.646,
      "eval_steps_per_second": 5.728,
      "step": 534000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.292987512007685e-05,
      "loss": 2.941,
      "step": 534100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.292667307076529e-05,
      "loss": 2.965,
      "step": 534200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.292347102145374e-05,
      "loss": 2.9698,
      "step": 534300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.292026897214217e-05,
      "loss": 2.9825,
      "step": 534400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2917066922830616e-05,
      "loss": 2.993,
      "step": 534500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.291386487351905e-05,
      "loss": 2.9857,
      "step": 534600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2910662824207496e-05,
      "loss": 2.9598,
      "step": 534700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2907460774895935e-05,
      "loss": 2.9837,
      "step": 534800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2904258725584375e-05,
      "loss": 2.9807,
      "step": 534900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2901056676272815e-05,
      "loss": 2.9353,
      "step": 535000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.695117235183716,
      "eval_runtime": 110.5498,
      "eval_samples_per_second": 90.457,
      "eval_steps_per_second": 5.654,
      "step": 535000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2897854626961255e-05,
      "loss": 2.9596,
      "step": 535100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.28946525776497e-05,
      "loss": 2.9586,
      "step": 535200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2891450528338134e-05,
      "loss": 2.9492,
      "step": 535300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.288824847902658e-05,
      "loss": 2.9794,
      "step": 535400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.288504642971502e-05,
      "loss": 2.9707,
      "step": 535500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.288184438040346e-05,
      "loss": 2.9575,
      "step": 535600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.28786423310919e-05,
      "loss": 2.9512,
      "step": 535700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.287544028178034e-05,
      "loss": 2.9482,
      "step": 535800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2872238232468786e-05,
      "loss": 2.9666,
      "step": 535900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.286903618315722e-05,
      "loss": 2.9507,
      "step": 536000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.69309663772583,
      "eval_runtime": 131.1239,
      "eval_samples_per_second": 76.264,
      "eval_steps_per_second": 4.766,
      "step": 536000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2865834133845666e-05,
      "loss": 2.9316,
      "step": 536100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.28626320845341e-05,
      "loss": 2.9652,
      "step": 536200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2859430035222545e-05,
      "loss": 2.9701,
      "step": 536300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2856227985910985e-05,
      "loss": 2.9406,
      "step": 536400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2853025936599424e-05,
      "loss": 2.9597,
      "step": 536500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2849823887287864e-05,
      "loss": 2.9735,
      "step": 536600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2846621837976304e-05,
      "loss": 2.9716,
      "step": 536700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.284341978866475e-05,
      "loss": 2.9718,
      "step": 536800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2840217739353183e-05,
      "loss": 2.9605,
      "step": 536900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.283701569004163e-05,
      "loss": 2.961,
      "step": 537000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6954832077026367,
      "eval_runtime": 109.5677,
      "eval_samples_per_second": 91.268,
      "eval_steps_per_second": 5.704,
      "step": 537000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.283381364073007e-05,
      "loss": 2.9633,
      "step": 537100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.283061159141851e-05,
      "loss": 2.9374,
      "step": 537200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.282740954210695e-05,
      "loss": 2.9701,
      "step": 537300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.282420749279539e-05,
      "loss": 2.9606,
      "step": 537400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2821005443483835e-05,
      "loss": 2.963,
      "step": 537500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.281780339417227e-05,
      "loss": 2.9549,
      "step": 537600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2814601344860715e-05,
      "loss": 2.9769,
      "step": 537700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2811399295549155e-05,
      "loss": 2.9688,
      "step": 537800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2808197246237594e-05,
      "loss": 2.9848,
      "step": 537900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2804995196926034e-05,
      "loss": 2.9728,
      "step": 538000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.6925764083862305,
      "eval_runtime": 109.1103,
      "eval_samples_per_second": 91.65,
      "eval_steps_per_second": 5.728,
      "step": 538000
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2801793147614474e-05,
      "loss": 2.9323,
      "step": 538100
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2798591098302914e-05,
      "loss": 2.9605,
      "step": 538200
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.279538904899135e-05,
      "loss": 2.9317,
      "step": 538300
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.27921869996798e-05,
      "loss": 2.9554,
      "step": 538400
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.278898495036823e-05,
      "loss": 2.9384,
      "step": 538500
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.278578290105668e-05,
      "loss": 2.9703,
      "step": 538600
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.278258085174512e-05,
      "loss": 2.9987,
      "step": 538700
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.277937880243356e-05,
      "loss": 2.975,
      "step": 538800
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.2776176753122e-05,
      "loss": 2.9772,
      "step": 538900
    },
    {
      "epoch": 0.34,
      "learning_rate": 3.277297470381044e-05,
      "loss": 2.9764,
      "step": 539000
    },
    {
      "epoch": 0.34,
      "eval_loss": 2.693347692489624,
      "eval_runtime": 131.1359,
      "eval_samples_per_second": 76.257,
      "eval_steps_per_second": 4.766,
      "step": 539000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2769772654498885e-05,
      "loss": 2.9744,
      "step": 539100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.276657060518732e-05,
      "loss": 2.9776,
      "step": 539200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2763368555875764e-05,
      "loss": 2.9884,
      "step": 539300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2760166506564204e-05,
      "loss": 2.98,
      "step": 539400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2756964457252644e-05,
      "loss": 2.9643,
      "step": 539500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2753762407941083e-05,
      "loss": 2.947,
      "step": 539600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.275056035862952e-05,
      "loss": 2.9673,
      "step": 539700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.274735830931796e-05,
      "loss": 2.9674,
      "step": 539800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.27441562600064e-05,
      "loss": 3.0046,
      "step": 539900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.274095421069485e-05,
      "loss": 2.9869,
      "step": 540000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.695218324661255,
      "eval_runtime": 109.0991,
      "eval_samples_per_second": 91.66,
      "eval_steps_per_second": 5.729,
      "step": 540000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.273775216138329e-05,
      "loss": 2.9408,
      "step": 540100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.273455011207173e-05,
      "loss": 2.9647,
      "step": 540200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.273134806276017e-05,
      "loss": 2.9328,
      "step": 540300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.272814601344861e-05,
      "loss": 2.9465,
      "step": 540400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.272494396413705e-05,
      "loss": 2.9742,
      "step": 540500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.272174191482549e-05,
      "loss": 2.9679,
      "step": 540600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2718539865513934e-05,
      "loss": 2.934,
      "step": 540700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2715337816202374e-05,
      "loss": 2.9514,
      "step": 540800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2712135766890814e-05,
      "loss": 2.9642,
      "step": 540900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.270893371757925e-05,
      "loss": 2.9615,
      "step": 541000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.695218563079834,
      "eval_runtime": 109.1836,
      "eval_samples_per_second": 91.589,
      "eval_steps_per_second": 5.724,
      "step": 541000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.270573166826769e-05,
      "loss": 2.9569,
      "step": 541100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.270252961895613e-05,
      "loss": 2.958,
      "step": 541200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.269932756964457e-05,
      "loss": 2.9586,
      "step": 541300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.269612552033301e-05,
      "loss": 2.9739,
      "step": 541400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.269292347102145e-05,
      "loss": 2.9453,
      "step": 541500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.26897214217099e-05,
      "loss": 2.9827,
      "step": 541600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.268651937239834e-05,
      "loss": 2.9677,
      "step": 541700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.268331732308678e-05,
      "loss": 2.9882,
      "step": 541800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.268011527377522e-05,
      "loss": 2.9552,
      "step": 541900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.267691322446366e-05,
      "loss": 3.0004,
      "step": 542000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.6946334838867188,
      "eval_runtime": 132.0277,
      "eval_samples_per_second": 75.742,
      "eval_steps_per_second": 4.734,
      "step": 542000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.26737111751521e-05,
      "loss": 2.9615,
      "step": 542100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.267050912584054e-05,
      "loss": 2.9704,
      "step": 542200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.266730707652898e-05,
      "loss": 2.993,
      "step": 542300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.266410502721742e-05,
      "loss": 2.9545,
      "step": 542400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.266090297790586e-05,
      "loss": 2.9547,
      "step": 542500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.26577009285943e-05,
      "loss": 2.9553,
      "step": 542600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.265449887928274e-05,
      "loss": 2.9912,
      "step": 542700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.265129682997118e-05,
      "loss": 2.9798,
      "step": 542800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.264809478065962e-05,
      "loss": 2.9617,
      "step": 542900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.264489273134806e-05,
      "loss": 2.9855,
      "step": 543000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.694458484649658,
      "eval_runtime": 109.5883,
      "eval_samples_per_second": 91.251,
      "eval_steps_per_second": 5.703,
      "step": 543000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.264169068203651e-05,
      "loss": 2.9731,
      "step": 543100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.263848863272495e-05,
      "loss": 3.0042,
      "step": 543200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.263528658341339e-05,
      "loss": 2.9567,
      "step": 543300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.263208453410183e-05,
      "loss": 2.9901,
      "step": 543400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.262888248479027e-05,
      "loss": 2.9746,
      "step": 543500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.262568043547871e-05,
      "loss": 2.9767,
      "step": 543600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.262247838616715e-05,
      "loss": 2.9701,
      "step": 543700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2619276336855586e-05,
      "loss": 2.9619,
      "step": 543800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2616074287544026e-05,
      "loss": 2.943,
      "step": 543900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.261287223823247e-05,
      "loss": 2.974,
      "step": 544000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.692997694015503,
      "eval_runtime": 109.612,
      "eval_samples_per_second": 91.231,
      "eval_steps_per_second": 5.702,
      "step": 544000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.260967018892091e-05,
      "loss": 2.9419,
      "step": 544100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.260646813960935e-05,
      "loss": 2.9611,
      "step": 544200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.260326609029779e-05,
      "loss": 2.9771,
      "step": 544300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.260006404098623e-05,
      "loss": 2.9827,
      "step": 544400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.259686199167467e-05,
      "loss": 2.9875,
      "step": 544500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.259365994236311e-05,
      "loss": 2.9663,
      "step": 544600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.259045789305156e-05,
      "loss": 2.972,
      "step": 544700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.258725584374e-05,
      "loss": 2.978,
      "step": 544800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.258405379442844e-05,
      "loss": 2.971,
      "step": 544900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.258085174511688e-05,
      "loss": 2.9637,
      "step": 545000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.692976474761963,
      "eval_runtime": 130.3749,
      "eval_samples_per_second": 76.702,
      "eval_steps_per_second": 4.794,
      "step": 545000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2577649695805317e-05,
      "loss": 2.9946,
      "step": 545100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2574447646493756e-05,
      "loss": 2.9915,
      "step": 545200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2571245597182196e-05,
      "loss": 2.9862,
      "step": 545300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.256804354787064e-05,
      "loss": 2.953,
      "step": 545400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2564841498559076e-05,
      "loss": 2.9549,
      "step": 545500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.256163944924752e-05,
      "loss": 2.9833,
      "step": 545600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.255843739993596e-05,
      "loss": 2.9536,
      "step": 545700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.25552353506244e-05,
      "loss": 2.9726,
      "step": 545800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.255203330131284e-05,
      "loss": 2.9401,
      "step": 545900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.254883125200128e-05,
      "loss": 2.9551,
      "step": 546000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.6912426948547363,
      "eval_runtime": 109.4798,
      "eval_samples_per_second": 91.341,
      "eval_steps_per_second": 5.709,
      "step": 546000
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.254562920268972e-05,
      "loss": 2.9715,
      "step": 546100
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.254242715337816e-05,
      "loss": 2.9471,
      "step": 546200
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.253922510406661e-05,
      "loss": 2.9535,
      "step": 546300
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.253602305475505e-05,
      "loss": 2.9671,
      "step": 546400
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2532821005443486e-05,
      "loss": 2.9927,
      "step": 546500
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2529618956131926e-05,
      "loss": 2.9668,
      "step": 546600
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2526416906820366e-05,
      "loss": 2.9495,
      "step": 546700
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2523214857508806e-05,
      "loss": 2.969,
      "step": 546800
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.2520012808197245e-05,
      "loss": 2.964,
      "step": 546900
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.251681075888569e-05,
      "loss": 3.0009,
      "step": 547000
    },
    {
      "epoch": 0.35,
      "eval_loss": 2.6918511390686035,
      "eval_runtime": 109.4168,
      "eval_samples_per_second": 91.394,
      "eval_steps_per_second": 5.712,
      "step": 547000
    }
  ],
  "max_steps": 1562500,
  "num_train_epochs": 1,
  "total_flos": 4.880403537587995e+18,
  "trial_name": null,
  "trial_params": null
}