cendol-llama2-7b-inst / trainer_state.json
afaji's picture
add model
472cc56
raw
history blame
176 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.986666666666667,
"eval_steps": 10000,
"global_step": 140000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.99862042482382e-05,
"loss": 1.1884,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 1.9971981823741495e-05,
"loss": 1.0269,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 1.995775939924479e-05,
"loss": 0.9881,
"step": 300
},
{
"epoch": 0.01,
"learning_rate": 1.994353697474809e-05,
"loss": 0.9348,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 1.9929314550251384e-05,
"loss": 0.9291,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.991509212575468e-05,
"loss": 0.8964,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 1.9900869701257974e-05,
"loss": 0.8579,
"step": 700
},
{
"epoch": 0.02,
"learning_rate": 1.9886647276761272e-05,
"loss": 0.8678,
"step": 800
},
{
"epoch": 0.02,
"learning_rate": 1.9872424852264567e-05,
"loss": 0.8506,
"step": 900
},
{
"epoch": 0.02,
"learning_rate": 1.9858202427767862e-05,
"loss": 0.8268,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 1.984398000327116e-05,
"loss": 0.8056,
"step": 1100
},
{
"epoch": 0.03,
"learning_rate": 1.9829757578774455e-05,
"loss": 0.7876,
"step": 1200
},
{
"epoch": 0.03,
"learning_rate": 1.981553515427775e-05,
"loss": 0.7956,
"step": 1300
},
{
"epoch": 0.03,
"learning_rate": 1.980131272978105e-05,
"loss": 0.7904,
"step": 1400
},
{
"epoch": 0.03,
"learning_rate": 1.9787090305284344e-05,
"loss": 0.7707,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 1.977286788078764e-05,
"loss": 0.7652,
"step": 1600
},
{
"epoch": 0.04,
"learning_rate": 1.9758645456290934e-05,
"loss": 0.7798,
"step": 1700
},
{
"epoch": 0.04,
"learning_rate": 1.9744423031794232e-05,
"loss": 0.7454,
"step": 1800
},
{
"epoch": 0.04,
"learning_rate": 1.9730200607297527e-05,
"loss": 0.7506,
"step": 1900
},
{
"epoch": 0.04,
"learning_rate": 1.9715978182800822e-05,
"loss": 0.727,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 1.970175575830412e-05,
"loss": 0.714,
"step": 2100
},
{
"epoch": 0.05,
"learning_rate": 1.9687533333807415e-05,
"loss": 0.7132,
"step": 2200
},
{
"epoch": 0.05,
"learning_rate": 1.9673310909310714e-05,
"loss": 0.7088,
"step": 2300
},
{
"epoch": 0.05,
"learning_rate": 1.965908848481401e-05,
"loss": 0.7266,
"step": 2400
},
{
"epoch": 0.05,
"learning_rate": 1.9644866060317303e-05,
"loss": 0.7031,
"step": 2500
},
{
"epoch": 0.06,
"learning_rate": 1.9630643635820602e-05,
"loss": 0.6948,
"step": 2600
},
{
"epoch": 0.06,
"learning_rate": 1.9616421211323893e-05,
"loss": 0.7048,
"step": 2700
},
{
"epoch": 0.06,
"learning_rate": 1.9602198786827192e-05,
"loss": 0.699,
"step": 2800
},
{
"epoch": 0.06,
"learning_rate": 1.9587976362330487e-05,
"loss": 0.679,
"step": 2900
},
{
"epoch": 0.06,
"learning_rate": 1.9573753937833785e-05,
"loss": 0.6847,
"step": 3000
},
{
"epoch": 0.07,
"learning_rate": 1.955953151333708e-05,
"loss": 0.6781,
"step": 3100
},
{
"epoch": 0.07,
"learning_rate": 1.9545309088840375e-05,
"loss": 0.6803,
"step": 3200
},
{
"epoch": 0.07,
"learning_rate": 1.9531086664343673e-05,
"loss": 0.6594,
"step": 3300
},
{
"epoch": 0.07,
"learning_rate": 1.951686423984697e-05,
"loss": 0.6729,
"step": 3400
},
{
"epoch": 0.07,
"learning_rate": 1.9502641815350263e-05,
"loss": 0.664,
"step": 3500
},
{
"epoch": 0.08,
"learning_rate": 1.9488419390853562e-05,
"loss": 0.6611,
"step": 3600
},
{
"epoch": 0.08,
"learning_rate": 1.9474196966356857e-05,
"loss": 0.6497,
"step": 3700
},
{
"epoch": 0.08,
"learning_rate": 1.945997454186015e-05,
"loss": 0.65,
"step": 3800
},
{
"epoch": 0.08,
"learning_rate": 1.9445752117363447e-05,
"loss": 0.647,
"step": 3900
},
{
"epoch": 0.09,
"learning_rate": 1.9431529692866745e-05,
"loss": 0.6528,
"step": 4000
},
{
"epoch": 0.09,
"learning_rate": 1.941730726837004e-05,
"loss": 0.6432,
"step": 4100
},
{
"epoch": 0.09,
"learning_rate": 1.9403084843873335e-05,
"loss": 0.6291,
"step": 4200
},
{
"epoch": 0.09,
"learning_rate": 1.9388862419376633e-05,
"loss": 0.6388,
"step": 4300
},
{
"epoch": 0.09,
"learning_rate": 1.937463999487993e-05,
"loss": 0.6261,
"step": 4400
},
{
"epoch": 0.1,
"learning_rate": 1.9360417570383227e-05,
"loss": 0.642,
"step": 4500
},
{
"epoch": 0.1,
"learning_rate": 1.934619514588652e-05,
"loss": 0.6399,
"step": 4600
},
{
"epoch": 0.1,
"learning_rate": 1.9331972721389817e-05,
"loss": 0.6213,
"step": 4700
},
{
"epoch": 0.1,
"learning_rate": 1.931775029689311e-05,
"loss": 0.6163,
"step": 4800
},
{
"epoch": 0.1,
"learning_rate": 1.9303527872396407e-05,
"loss": 0.6167,
"step": 4900
},
{
"epoch": 0.11,
"learning_rate": 1.9289305447899705e-05,
"loss": 0.6127,
"step": 5000
},
{
"epoch": 0.11,
"eval_loss": 0.6764795780181885,
"eval_runtime": 34.8863,
"eval_samples_per_second": 143.323,
"eval_steps_per_second": 1.147,
"step": 5000
},
{
"epoch": 0.11,
"learning_rate": 1.9275083023403e-05,
"loss": 0.5963,
"step": 5100
},
{
"epoch": 0.11,
"learning_rate": 1.9260860598906298e-05,
"loss": 0.6034,
"step": 5200
},
{
"epoch": 0.11,
"learning_rate": 1.9246638174409593e-05,
"loss": 0.5944,
"step": 5300
},
{
"epoch": 0.12,
"learning_rate": 1.9232415749912888e-05,
"loss": 0.5951,
"step": 5400
},
{
"epoch": 0.12,
"learning_rate": 1.9218193325416187e-05,
"loss": 0.6055,
"step": 5500
},
{
"epoch": 0.12,
"learning_rate": 1.920397090091948e-05,
"loss": 0.6052,
"step": 5600
},
{
"epoch": 0.12,
"learning_rate": 1.9189748476422777e-05,
"loss": 0.5907,
"step": 5700
},
{
"epoch": 0.12,
"learning_rate": 1.9175526051926075e-05,
"loss": 0.5933,
"step": 5800
},
{
"epoch": 0.13,
"learning_rate": 1.916130362742937e-05,
"loss": 0.6002,
"step": 5900
},
{
"epoch": 0.13,
"learning_rate": 1.9147081202932665e-05,
"loss": 0.5985,
"step": 6000
},
{
"epoch": 0.13,
"learning_rate": 1.913285877843596e-05,
"loss": 0.5761,
"step": 6100
},
{
"epoch": 0.13,
"learning_rate": 1.9118636353939258e-05,
"loss": 0.5749,
"step": 6200
},
{
"epoch": 0.13,
"learning_rate": 1.9104413929442553e-05,
"loss": 0.5855,
"step": 6300
},
{
"epoch": 0.14,
"learning_rate": 1.9090191504945848e-05,
"loss": 0.5724,
"step": 6400
},
{
"epoch": 0.14,
"learning_rate": 1.9075969080449146e-05,
"loss": 0.5856,
"step": 6500
},
{
"epoch": 0.14,
"learning_rate": 1.906174665595244e-05,
"loss": 0.5843,
"step": 6600
},
{
"epoch": 0.14,
"learning_rate": 1.904752423145574e-05,
"loss": 0.5624,
"step": 6700
},
{
"epoch": 0.15,
"learning_rate": 1.9033301806959035e-05,
"loss": 0.5784,
"step": 6800
},
{
"epoch": 0.15,
"learning_rate": 1.901907938246233e-05,
"loss": 0.5589,
"step": 6900
},
{
"epoch": 0.15,
"learning_rate": 1.9004856957965625e-05,
"loss": 0.5732,
"step": 7000
},
{
"epoch": 0.15,
"learning_rate": 1.899063453346892e-05,
"loss": 0.5529,
"step": 7100
},
{
"epoch": 0.15,
"learning_rate": 1.8976412108972218e-05,
"loss": 0.5648,
"step": 7200
},
{
"epoch": 0.16,
"learning_rate": 1.8962189684475513e-05,
"loss": 0.5535,
"step": 7300
},
{
"epoch": 0.16,
"learning_rate": 1.894796725997881e-05,
"loss": 0.5527,
"step": 7400
},
{
"epoch": 0.16,
"learning_rate": 1.8933744835482106e-05,
"loss": 0.5478,
"step": 7500
},
{
"epoch": 0.16,
"learning_rate": 1.89195224109854e-05,
"loss": 0.5419,
"step": 7600
},
{
"epoch": 0.16,
"learning_rate": 1.89052999864887e-05,
"loss": 0.5596,
"step": 7700
},
{
"epoch": 0.17,
"learning_rate": 1.8891077561991995e-05,
"loss": 0.5444,
"step": 7800
},
{
"epoch": 0.17,
"learning_rate": 1.887685513749529e-05,
"loss": 0.5526,
"step": 7900
},
{
"epoch": 0.17,
"learning_rate": 1.8862632712998585e-05,
"loss": 0.5343,
"step": 8000
},
{
"epoch": 0.17,
"learning_rate": 1.8848410288501883e-05,
"loss": 0.5327,
"step": 8100
},
{
"epoch": 0.17,
"learning_rate": 1.8834187864005178e-05,
"loss": 0.5289,
"step": 8200
},
{
"epoch": 0.18,
"learning_rate": 1.8819965439508473e-05,
"loss": 0.5467,
"step": 8300
},
{
"epoch": 0.18,
"learning_rate": 1.880574301501177e-05,
"loss": 0.5357,
"step": 8400
},
{
"epoch": 0.18,
"learning_rate": 1.8791520590515066e-05,
"loss": 0.5317,
"step": 8500
},
{
"epoch": 0.18,
"learning_rate": 1.877729816601836e-05,
"loss": 0.5326,
"step": 8600
},
{
"epoch": 0.19,
"learning_rate": 1.876307574152166e-05,
"loss": 0.5335,
"step": 8700
},
{
"epoch": 0.19,
"learning_rate": 1.8748853317024955e-05,
"loss": 0.5364,
"step": 8800
},
{
"epoch": 0.19,
"learning_rate": 1.8734630892528253e-05,
"loss": 0.5295,
"step": 8900
},
{
"epoch": 0.19,
"learning_rate": 1.8720408468031548e-05,
"loss": 0.53,
"step": 9000
},
{
"epoch": 0.19,
"learning_rate": 1.8706186043534843e-05,
"loss": 0.5325,
"step": 9100
},
{
"epoch": 0.2,
"learning_rate": 1.8691963619038138e-05,
"loss": 0.5196,
"step": 9200
},
{
"epoch": 0.2,
"learning_rate": 1.8677741194541433e-05,
"loss": 0.5206,
"step": 9300
},
{
"epoch": 0.2,
"learning_rate": 1.866351877004473e-05,
"loss": 0.5231,
"step": 9400
},
{
"epoch": 0.2,
"learning_rate": 1.8649296345548026e-05,
"loss": 0.5134,
"step": 9500
},
{
"epoch": 0.2,
"learning_rate": 1.8635073921051325e-05,
"loss": 0.5113,
"step": 9600
},
{
"epoch": 0.21,
"learning_rate": 1.862085149655462e-05,
"loss": 0.5147,
"step": 9700
},
{
"epoch": 0.21,
"learning_rate": 1.8606629072057914e-05,
"loss": 0.5255,
"step": 9800
},
{
"epoch": 0.21,
"learning_rate": 1.8592406647561213e-05,
"loss": 0.5106,
"step": 9900
},
{
"epoch": 0.21,
"learning_rate": 1.8578184223064508e-05,
"loss": 0.5083,
"step": 10000
},
{
"epoch": 0.21,
"eval_loss": 0.5610331296920776,
"eval_runtime": 34.9828,
"eval_samples_per_second": 142.927,
"eval_steps_per_second": 1.143,
"step": 10000
},
{
"epoch": 0.22,
"learning_rate": 1.8563961798567803e-05,
"loss": 0.5158,
"step": 10100
},
{
"epoch": 0.22,
"learning_rate": 1.8549739374071098e-05,
"loss": 0.5055,
"step": 10200
},
{
"epoch": 0.22,
"learning_rate": 1.8535516949574396e-05,
"loss": 0.5006,
"step": 10300
},
{
"epoch": 0.22,
"learning_rate": 1.852129452507769e-05,
"loss": 0.507,
"step": 10400
},
{
"epoch": 0.22,
"learning_rate": 1.8507072100580986e-05,
"loss": 0.4954,
"step": 10500
},
{
"epoch": 0.23,
"learning_rate": 1.8492849676084284e-05,
"loss": 0.5027,
"step": 10600
},
{
"epoch": 0.23,
"learning_rate": 1.847862725158758e-05,
"loss": 0.4875,
"step": 10700
},
{
"epoch": 0.23,
"learning_rate": 1.8464404827090874e-05,
"loss": 0.4911,
"step": 10800
},
{
"epoch": 0.23,
"learning_rate": 1.8450182402594173e-05,
"loss": 0.483,
"step": 10900
},
{
"epoch": 0.23,
"learning_rate": 1.8435959978097468e-05,
"loss": 0.4891,
"step": 11000
},
{
"epoch": 0.24,
"learning_rate": 1.8421737553600766e-05,
"loss": 0.4911,
"step": 11100
},
{
"epoch": 0.24,
"learning_rate": 1.8407515129104058e-05,
"loss": 0.4742,
"step": 11200
},
{
"epoch": 0.24,
"learning_rate": 1.8393292704607356e-05,
"loss": 0.4959,
"step": 11300
},
{
"epoch": 0.24,
"learning_rate": 1.837907028011065e-05,
"loss": 0.4959,
"step": 11400
},
{
"epoch": 0.25,
"learning_rate": 1.8364847855613946e-05,
"loss": 0.4798,
"step": 11500
},
{
"epoch": 0.25,
"learning_rate": 1.8350625431117244e-05,
"loss": 0.4927,
"step": 11600
},
{
"epoch": 0.25,
"learning_rate": 1.833640300662054e-05,
"loss": 0.4824,
"step": 11700
},
{
"epoch": 0.25,
"learning_rate": 1.8322180582123838e-05,
"loss": 0.4811,
"step": 11800
},
{
"epoch": 0.25,
"learning_rate": 1.8307958157627133e-05,
"loss": 0.4781,
"step": 11900
},
{
"epoch": 0.26,
"learning_rate": 1.8293735733130428e-05,
"loss": 0.4705,
"step": 12000
},
{
"epoch": 0.26,
"learning_rate": 1.8279513308633726e-05,
"loss": 0.4633,
"step": 12100
},
{
"epoch": 0.26,
"learning_rate": 1.826529088413702e-05,
"loss": 0.4809,
"step": 12200
},
{
"epoch": 0.26,
"learning_rate": 1.8251068459640316e-05,
"loss": 0.4801,
"step": 12300
},
{
"epoch": 0.26,
"learning_rate": 1.823684603514361e-05,
"loss": 0.4813,
"step": 12400
},
{
"epoch": 0.27,
"learning_rate": 1.822262361064691e-05,
"loss": 0.4682,
"step": 12500
},
{
"epoch": 0.27,
"learning_rate": 1.8208401186150204e-05,
"loss": 0.4745,
"step": 12600
},
{
"epoch": 0.27,
"learning_rate": 1.81941787616535e-05,
"loss": 0.4631,
"step": 12700
},
{
"epoch": 0.27,
"learning_rate": 1.8179956337156798e-05,
"loss": 0.4733,
"step": 12800
},
{
"epoch": 0.28,
"learning_rate": 1.8165733912660093e-05,
"loss": 0.4687,
"step": 12900
},
{
"epoch": 0.28,
"learning_rate": 1.8151511488163388e-05,
"loss": 0.4685,
"step": 13000
},
{
"epoch": 0.28,
"learning_rate": 1.8137289063666686e-05,
"loss": 0.4638,
"step": 13100
},
{
"epoch": 0.28,
"learning_rate": 1.812306663916998e-05,
"loss": 0.4482,
"step": 13200
},
{
"epoch": 0.28,
"learning_rate": 1.810884421467328e-05,
"loss": 0.4452,
"step": 13300
},
{
"epoch": 0.29,
"learning_rate": 1.809462179017657e-05,
"loss": 0.4553,
"step": 13400
},
{
"epoch": 0.29,
"learning_rate": 1.808039936567987e-05,
"loss": 0.4498,
"step": 13500
},
{
"epoch": 0.29,
"learning_rate": 1.8066176941183164e-05,
"loss": 0.4468,
"step": 13600
},
{
"epoch": 0.29,
"learning_rate": 1.805195451668646e-05,
"loss": 0.4466,
"step": 13700
},
{
"epoch": 0.29,
"learning_rate": 1.8037732092189757e-05,
"loss": 0.4465,
"step": 13800
},
{
"epoch": 0.3,
"learning_rate": 1.8023509667693052e-05,
"loss": 0.4426,
"step": 13900
},
{
"epoch": 0.3,
"learning_rate": 1.800928724319635e-05,
"loss": 0.4466,
"step": 14000
},
{
"epoch": 0.3,
"learning_rate": 1.7995064818699646e-05,
"loss": 0.4444,
"step": 14100
},
{
"epoch": 0.3,
"learning_rate": 1.798084239420294e-05,
"loss": 0.4445,
"step": 14200
},
{
"epoch": 0.31,
"learning_rate": 1.796661996970624e-05,
"loss": 0.4395,
"step": 14300
},
{
"epoch": 0.31,
"learning_rate": 1.795239754520953e-05,
"loss": 0.4425,
"step": 14400
},
{
"epoch": 0.31,
"learning_rate": 1.793817512071283e-05,
"loss": 0.4334,
"step": 14500
},
{
"epoch": 0.31,
"learning_rate": 1.7923952696216124e-05,
"loss": 0.4411,
"step": 14600
},
{
"epoch": 0.31,
"learning_rate": 1.7909730271719422e-05,
"loss": 0.4481,
"step": 14700
},
{
"epoch": 0.32,
"learning_rate": 1.7895507847222717e-05,
"loss": 0.431,
"step": 14800
},
{
"epoch": 0.32,
"learning_rate": 1.7881285422726012e-05,
"loss": 0.451,
"step": 14900
},
{
"epoch": 0.32,
"learning_rate": 1.786706299822931e-05,
"loss": 0.4406,
"step": 15000
},
{
"epoch": 0.32,
"eval_loss": 0.47009751200675964,
"eval_runtime": 35.0436,
"eval_samples_per_second": 142.68,
"eval_steps_per_second": 1.141,
"step": 15000
},
{
"epoch": 0.32,
"learning_rate": 1.7852840573732606e-05,
"loss": 0.4358,
"step": 15100
},
{
"epoch": 0.32,
"learning_rate": 1.78386181492359e-05,
"loss": 0.4333,
"step": 15200
},
{
"epoch": 0.33,
"learning_rate": 1.78243957247392e-05,
"loss": 0.4349,
"step": 15300
},
{
"epoch": 0.33,
"learning_rate": 1.7810173300242494e-05,
"loss": 0.4317,
"step": 15400
},
{
"epoch": 0.33,
"learning_rate": 1.779595087574579e-05,
"loss": 0.4248,
"step": 15500
},
{
"epoch": 0.33,
"learning_rate": 1.7781728451249084e-05,
"loss": 0.433,
"step": 15600
},
{
"epoch": 0.33,
"learning_rate": 1.7767506026752382e-05,
"loss": 0.4275,
"step": 15700
},
{
"epoch": 0.34,
"learning_rate": 1.7753283602255677e-05,
"loss": 0.4299,
"step": 15800
},
{
"epoch": 0.34,
"learning_rate": 1.7739061177758972e-05,
"loss": 0.4294,
"step": 15900
},
{
"epoch": 0.34,
"learning_rate": 1.772483875326227e-05,
"loss": 0.416,
"step": 16000
},
{
"epoch": 0.34,
"learning_rate": 1.7710616328765566e-05,
"loss": 0.4146,
"step": 16100
},
{
"epoch": 0.35,
"learning_rate": 1.7696393904268864e-05,
"loss": 0.4212,
"step": 16200
},
{
"epoch": 0.35,
"learning_rate": 1.768217147977216e-05,
"loss": 0.4201,
"step": 16300
},
{
"epoch": 0.35,
"learning_rate": 1.7667949055275454e-05,
"loss": 0.4147,
"step": 16400
},
{
"epoch": 0.35,
"learning_rate": 1.7653726630778752e-05,
"loss": 0.421,
"step": 16500
},
{
"epoch": 0.35,
"learning_rate": 1.7639504206282044e-05,
"loss": 0.4089,
"step": 16600
},
{
"epoch": 0.36,
"learning_rate": 1.7625281781785342e-05,
"loss": 0.4118,
"step": 16700
},
{
"epoch": 0.36,
"learning_rate": 1.7611059357288637e-05,
"loss": 0.4112,
"step": 16800
},
{
"epoch": 0.36,
"learning_rate": 1.7596836932791936e-05,
"loss": 0.408,
"step": 16900
},
{
"epoch": 0.36,
"learning_rate": 1.758261450829523e-05,
"loss": 0.4174,
"step": 17000
},
{
"epoch": 0.36,
"learning_rate": 1.7568392083798525e-05,
"loss": 0.4072,
"step": 17100
},
{
"epoch": 0.37,
"learning_rate": 1.7554169659301824e-05,
"loss": 0.4136,
"step": 17200
},
{
"epoch": 0.37,
"learning_rate": 1.753994723480512e-05,
"loss": 0.395,
"step": 17300
},
{
"epoch": 0.37,
"learning_rate": 1.7525724810308414e-05,
"loss": 0.4179,
"step": 17400
},
{
"epoch": 0.37,
"learning_rate": 1.7511502385811712e-05,
"loss": 0.4104,
"step": 17500
},
{
"epoch": 0.38,
"learning_rate": 1.7497279961315007e-05,
"loss": 0.404,
"step": 17600
},
{
"epoch": 0.38,
"learning_rate": 1.7483057536818302e-05,
"loss": 0.4011,
"step": 17700
},
{
"epoch": 0.38,
"learning_rate": 1.7468835112321597e-05,
"loss": 0.3986,
"step": 17800
},
{
"epoch": 0.38,
"learning_rate": 1.7454612687824895e-05,
"loss": 0.399,
"step": 17900
},
{
"epoch": 0.38,
"learning_rate": 1.744039026332819e-05,
"loss": 0.3954,
"step": 18000
},
{
"epoch": 0.39,
"learning_rate": 1.7426167838831485e-05,
"loss": 0.4122,
"step": 18100
},
{
"epoch": 0.39,
"learning_rate": 1.7411945414334784e-05,
"loss": 0.3826,
"step": 18200
},
{
"epoch": 0.39,
"learning_rate": 1.739772298983808e-05,
"loss": 0.3874,
"step": 18300
},
{
"epoch": 0.39,
"learning_rate": 1.7383500565341377e-05,
"loss": 0.3815,
"step": 18400
},
{
"epoch": 0.39,
"learning_rate": 1.7369278140844672e-05,
"loss": 0.3863,
"step": 18500
},
{
"epoch": 0.4,
"learning_rate": 1.7355055716347967e-05,
"loss": 0.3827,
"step": 18600
},
{
"epoch": 0.4,
"learning_rate": 1.7340833291851262e-05,
"loss": 0.3944,
"step": 18700
},
{
"epoch": 0.4,
"learning_rate": 1.7326610867354557e-05,
"loss": 0.392,
"step": 18800
},
{
"epoch": 0.4,
"learning_rate": 1.7312388442857855e-05,
"loss": 0.3948,
"step": 18900
},
{
"epoch": 0.41,
"learning_rate": 1.729816601836115e-05,
"loss": 0.382,
"step": 19000
},
{
"epoch": 0.41,
"learning_rate": 1.728394359386445e-05,
"loss": 0.3803,
"step": 19100
},
{
"epoch": 0.41,
"learning_rate": 1.7269721169367744e-05,
"loss": 0.3795,
"step": 19200
},
{
"epoch": 0.41,
"learning_rate": 1.725549874487104e-05,
"loss": 0.383,
"step": 19300
},
{
"epoch": 0.41,
"learning_rate": 1.7241276320374337e-05,
"loss": 0.3865,
"step": 19400
},
{
"epoch": 0.42,
"learning_rate": 1.7227053895877632e-05,
"loss": 0.3755,
"step": 19500
},
{
"epoch": 0.42,
"learning_rate": 1.7212831471380927e-05,
"loss": 0.3783,
"step": 19600
},
{
"epoch": 0.42,
"learning_rate": 1.7198609046884225e-05,
"loss": 0.3822,
"step": 19700
},
{
"epoch": 0.42,
"learning_rate": 1.718438662238752e-05,
"loss": 0.375,
"step": 19800
},
{
"epoch": 0.42,
"learning_rate": 1.7170164197890815e-05,
"loss": 0.3823,
"step": 19900
},
{
"epoch": 0.43,
"learning_rate": 1.715594177339411e-05,
"loss": 0.3847,
"step": 20000
},
{
"epoch": 0.43,
"eval_loss": 0.4110700190067291,
"eval_runtime": 35.0314,
"eval_samples_per_second": 142.729,
"eval_steps_per_second": 1.142,
"step": 20000
},
{
"epoch": 0.43,
"learning_rate": 1.714171934889741e-05,
"loss": 0.3818,
"step": 20100
},
{
"epoch": 0.43,
"learning_rate": 1.7127496924400704e-05,
"loss": 0.3802,
"step": 20200
},
{
"epoch": 0.43,
"learning_rate": 1.7113274499904e-05,
"loss": 0.3726,
"step": 20300
},
{
"epoch": 0.44,
"learning_rate": 1.7099052075407297e-05,
"loss": 0.3822,
"step": 20400
},
{
"epoch": 0.44,
"learning_rate": 1.7084829650910592e-05,
"loss": 0.363,
"step": 20500
},
{
"epoch": 0.44,
"learning_rate": 1.707060722641389e-05,
"loss": 0.3604,
"step": 20600
},
{
"epoch": 0.44,
"learning_rate": 1.7056384801917185e-05,
"loss": 0.3665,
"step": 20700
},
{
"epoch": 0.44,
"learning_rate": 1.704216237742048e-05,
"loss": 0.3575,
"step": 20800
},
{
"epoch": 0.45,
"learning_rate": 1.7027939952923775e-05,
"loss": 0.3655,
"step": 20900
},
{
"epoch": 0.45,
"learning_rate": 1.701371752842707e-05,
"loss": 0.3718,
"step": 21000
},
{
"epoch": 0.45,
"learning_rate": 1.699949510393037e-05,
"loss": 0.3667,
"step": 21100
},
{
"epoch": 0.45,
"learning_rate": 1.6985272679433663e-05,
"loss": 0.3511,
"step": 21200
},
{
"epoch": 0.45,
"learning_rate": 1.6971050254936962e-05,
"loss": 0.3644,
"step": 21300
},
{
"epoch": 0.46,
"learning_rate": 1.6956827830440257e-05,
"loss": 0.3638,
"step": 21400
},
{
"epoch": 0.46,
"learning_rate": 1.6942605405943552e-05,
"loss": 0.3637,
"step": 21500
},
{
"epoch": 0.46,
"learning_rate": 1.692838298144685e-05,
"loss": 0.3625,
"step": 21600
},
{
"epoch": 0.46,
"learning_rate": 1.6914160556950145e-05,
"loss": 0.3501,
"step": 21700
},
{
"epoch": 0.47,
"learning_rate": 1.689993813245344e-05,
"loss": 0.3604,
"step": 21800
},
{
"epoch": 0.47,
"learning_rate": 1.6885715707956735e-05,
"loss": 0.3615,
"step": 21900
},
{
"epoch": 0.47,
"learning_rate": 1.6871493283460033e-05,
"loss": 0.3558,
"step": 22000
},
{
"epoch": 0.47,
"learning_rate": 1.685727085896333e-05,
"loss": 0.3644,
"step": 22100
},
{
"epoch": 0.47,
"learning_rate": 1.6843048434466623e-05,
"loss": 0.3564,
"step": 22200
},
{
"epoch": 0.48,
"learning_rate": 1.682882600996992e-05,
"loss": 0.35,
"step": 22300
},
{
"epoch": 0.48,
"learning_rate": 1.6814603585473217e-05,
"loss": 0.3559,
"step": 22400
},
{
"epoch": 0.48,
"learning_rate": 1.680038116097651e-05,
"loss": 0.3585,
"step": 22500
},
{
"epoch": 0.48,
"learning_rate": 1.678615873647981e-05,
"loss": 0.3544,
"step": 22600
},
{
"epoch": 0.48,
"learning_rate": 1.6771936311983105e-05,
"loss": 0.3459,
"step": 22700
},
{
"epoch": 0.49,
"learning_rate": 1.6757713887486403e-05,
"loss": 0.3521,
"step": 22800
},
{
"epoch": 0.49,
"learning_rate": 1.6743491462989698e-05,
"loss": 0.3431,
"step": 22900
},
{
"epoch": 0.49,
"learning_rate": 1.6729269038492993e-05,
"loss": 0.337,
"step": 23000
},
{
"epoch": 0.49,
"learning_rate": 1.6715046613996288e-05,
"loss": 0.3376,
"step": 23100
},
{
"epoch": 0.49,
"learning_rate": 1.6700824189499583e-05,
"loss": 0.3345,
"step": 23200
},
{
"epoch": 0.5,
"learning_rate": 1.668660176500288e-05,
"loss": 0.3327,
"step": 23300
},
{
"epoch": 0.5,
"learning_rate": 1.6672379340506177e-05,
"loss": 0.3431,
"step": 23400
},
{
"epoch": 0.5,
"learning_rate": 1.6658156916009475e-05,
"loss": 0.3375,
"step": 23500
},
{
"epoch": 0.5,
"learning_rate": 1.664393449151277e-05,
"loss": 0.3369,
"step": 23600
},
{
"epoch": 0.51,
"learning_rate": 1.6629712067016065e-05,
"loss": 0.3397,
"step": 23700
},
{
"epoch": 0.51,
"learning_rate": 1.6615489642519363e-05,
"loss": 0.338,
"step": 23800
},
{
"epoch": 0.51,
"learning_rate": 1.6601267218022658e-05,
"loss": 0.3311,
"step": 23900
},
{
"epoch": 0.51,
"learning_rate": 1.6587044793525953e-05,
"loss": 0.3369,
"step": 24000
},
{
"epoch": 0.51,
"learning_rate": 1.6572822369029248e-05,
"loss": 0.3351,
"step": 24100
},
{
"epoch": 0.52,
"learning_rate": 1.6558599944532547e-05,
"loss": 0.3428,
"step": 24200
},
{
"epoch": 0.52,
"learning_rate": 1.654437752003584e-05,
"loss": 0.3324,
"step": 24300
},
{
"epoch": 0.52,
"learning_rate": 1.6530155095539136e-05,
"loss": 0.3273,
"step": 24400
},
{
"epoch": 0.52,
"learning_rate": 1.6515932671042435e-05,
"loss": 0.3262,
"step": 24500
},
{
"epoch": 0.52,
"learning_rate": 1.650171024654573e-05,
"loss": 0.3343,
"step": 24600
},
{
"epoch": 0.53,
"learning_rate": 1.6487487822049025e-05,
"loss": 0.3269,
"step": 24700
},
{
"epoch": 0.53,
"learning_rate": 1.6473265397552323e-05,
"loss": 0.336,
"step": 24800
},
{
"epoch": 0.53,
"learning_rate": 1.6459042973055618e-05,
"loss": 0.3316,
"step": 24900
},
{
"epoch": 0.53,
"learning_rate": 1.6444820548558916e-05,
"loss": 0.3257,
"step": 25000
},
{
"epoch": 0.53,
"eval_loss": 0.36412838101387024,
"eval_runtime": 34.9354,
"eval_samples_per_second": 143.121,
"eval_steps_per_second": 1.145,
"step": 25000
},
{
"epoch": 0.54,
"learning_rate": 1.6430598124062208e-05,
"loss": 0.3235,
"step": 25100
},
{
"epoch": 0.54,
"learning_rate": 1.6416375699565506e-05,
"loss": 0.3294,
"step": 25200
},
{
"epoch": 0.54,
"learning_rate": 1.64021532750688e-05,
"loss": 0.3268,
"step": 25300
},
{
"epoch": 0.54,
"learning_rate": 1.6387930850572096e-05,
"loss": 0.3162,
"step": 25400
},
{
"epoch": 0.54,
"learning_rate": 1.6373708426075395e-05,
"loss": 0.3199,
"step": 25500
},
{
"epoch": 0.55,
"learning_rate": 1.635948600157869e-05,
"loss": 0.3168,
"step": 25600
},
{
"epoch": 0.55,
"learning_rate": 1.6345263577081988e-05,
"loss": 0.3209,
"step": 25700
},
{
"epoch": 0.55,
"learning_rate": 1.6331041152585283e-05,
"loss": 0.3097,
"step": 25800
},
{
"epoch": 0.55,
"learning_rate": 1.6316818728088578e-05,
"loss": 0.3137,
"step": 25900
},
{
"epoch": 0.55,
"learning_rate": 1.6302596303591876e-05,
"loss": 0.3095,
"step": 26000
},
{
"epoch": 0.56,
"learning_rate": 1.628837387909517e-05,
"loss": 0.3205,
"step": 26100
},
{
"epoch": 0.56,
"learning_rate": 1.6274151454598466e-05,
"loss": 0.3141,
"step": 26200
},
{
"epoch": 0.56,
"learning_rate": 1.625992903010176e-05,
"loss": 0.3177,
"step": 26300
},
{
"epoch": 0.56,
"learning_rate": 1.624570660560506e-05,
"loss": 0.3198,
"step": 26400
},
{
"epoch": 0.57,
"learning_rate": 1.6231484181108355e-05,
"loss": 0.3187,
"step": 26500
},
{
"epoch": 0.57,
"learning_rate": 1.621726175661165e-05,
"loss": 0.3208,
"step": 26600
},
{
"epoch": 0.57,
"learning_rate": 1.6203039332114948e-05,
"loss": 0.3152,
"step": 26700
},
{
"epoch": 0.57,
"learning_rate": 1.6188816907618243e-05,
"loss": 0.3194,
"step": 26800
},
{
"epoch": 0.57,
"learning_rate": 1.6174594483121538e-05,
"loss": 0.3199,
"step": 26900
},
{
"epoch": 0.58,
"learning_rate": 1.6160372058624836e-05,
"loss": 0.3181,
"step": 27000
},
{
"epoch": 0.58,
"learning_rate": 1.614614963412813e-05,
"loss": 0.3185,
"step": 27100
},
{
"epoch": 0.58,
"learning_rate": 1.6131927209631426e-05,
"loss": 0.3178,
"step": 27200
},
{
"epoch": 0.58,
"learning_rate": 1.611770478513472e-05,
"loss": 0.3056,
"step": 27300
},
{
"epoch": 0.58,
"learning_rate": 1.610348236063802e-05,
"loss": 0.2985,
"step": 27400
},
{
"epoch": 0.59,
"learning_rate": 1.6089259936141315e-05,
"loss": 0.3099,
"step": 27500
},
{
"epoch": 0.59,
"learning_rate": 1.607503751164461e-05,
"loss": 0.3004,
"step": 27600
},
{
"epoch": 0.59,
"learning_rate": 1.6060815087147908e-05,
"loss": 0.2997,
"step": 27700
},
{
"epoch": 0.59,
"learning_rate": 1.6046592662651203e-05,
"loss": 0.3124,
"step": 27800
},
{
"epoch": 0.6,
"learning_rate": 1.60323702381545e-05,
"loss": 0.3008,
"step": 27900
},
{
"epoch": 0.6,
"learning_rate": 1.6018147813657796e-05,
"loss": 0.3086,
"step": 28000
},
{
"epoch": 0.6,
"learning_rate": 1.600392538916109e-05,
"loss": 0.3028,
"step": 28100
},
{
"epoch": 0.6,
"learning_rate": 1.598970296466439e-05,
"loss": 0.3035,
"step": 28200
},
{
"epoch": 0.6,
"learning_rate": 1.597548054016768e-05,
"loss": 0.2954,
"step": 28300
},
{
"epoch": 0.61,
"learning_rate": 1.596125811567098e-05,
"loss": 0.2794,
"step": 28400
},
{
"epoch": 0.61,
"learning_rate": 1.5947035691174274e-05,
"loss": 0.3009,
"step": 28500
},
{
"epoch": 0.61,
"learning_rate": 1.5932813266677573e-05,
"loss": 0.2945,
"step": 28600
},
{
"epoch": 0.61,
"learning_rate": 1.5918590842180868e-05,
"loss": 0.3038,
"step": 28700
},
{
"epoch": 0.61,
"learning_rate": 1.5904368417684163e-05,
"loss": 0.2877,
"step": 28800
},
{
"epoch": 0.62,
"learning_rate": 1.589014599318746e-05,
"loss": 0.2928,
"step": 28900
},
{
"epoch": 0.62,
"learning_rate": 1.5875923568690756e-05,
"loss": 0.298,
"step": 29000
},
{
"epoch": 0.62,
"learning_rate": 1.586170114419405e-05,
"loss": 0.2985,
"step": 29100
},
{
"epoch": 0.62,
"learning_rate": 1.584747871969735e-05,
"loss": 0.2868,
"step": 29200
},
{
"epoch": 0.63,
"learning_rate": 1.5833256295200644e-05,
"loss": 0.2935,
"step": 29300
},
{
"epoch": 0.63,
"learning_rate": 1.581903387070394e-05,
"loss": 0.2924,
"step": 29400
},
{
"epoch": 0.63,
"learning_rate": 1.5804811446207234e-05,
"loss": 0.2911,
"step": 29500
},
{
"epoch": 0.63,
"learning_rate": 1.5790589021710533e-05,
"loss": 0.2947,
"step": 29600
},
{
"epoch": 0.63,
"learning_rate": 1.5776366597213828e-05,
"loss": 0.2872,
"step": 29700
},
{
"epoch": 0.64,
"learning_rate": 1.5762144172717123e-05,
"loss": 0.2782,
"step": 29800
},
{
"epoch": 0.64,
"learning_rate": 1.574792174822042e-05,
"loss": 0.2877,
"step": 29900
},
{
"epoch": 0.64,
"learning_rate": 1.5733699323723716e-05,
"loss": 0.2874,
"step": 30000
},
{
"epoch": 0.64,
"eval_loss": 0.3142920732498169,
"eval_runtime": 34.9962,
"eval_samples_per_second": 142.872,
"eval_steps_per_second": 1.143,
"step": 30000
},
{
"epoch": 0.64,
"learning_rate": 1.5719476899227014e-05,
"loss": 0.2884,
"step": 30100
},
{
"epoch": 0.64,
"learning_rate": 1.570525447473031e-05,
"loss": 0.2756,
"step": 30200
},
{
"epoch": 0.65,
"learning_rate": 1.5691032050233604e-05,
"loss": 0.2847,
"step": 30300
},
{
"epoch": 0.65,
"learning_rate": 1.56768096257369e-05,
"loss": 0.2742,
"step": 30400
},
{
"epoch": 0.65,
"learning_rate": 1.5662587201240194e-05,
"loss": 0.2818,
"step": 30500
},
{
"epoch": 0.65,
"learning_rate": 1.5648364776743493e-05,
"loss": 0.2804,
"step": 30600
},
{
"epoch": 0.65,
"learning_rate": 1.5634142352246788e-05,
"loss": 0.2729,
"step": 30700
},
{
"epoch": 0.66,
"learning_rate": 1.5619919927750086e-05,
"loss": 0.2771,
"step": 30800
},
{
"epoch": 0.66,
"learning_rate": 1.560569750325338e-05,
"loss": 0.2863,
"step": 30900
},
{
"epoch": 0.66,
"learning_rate": 1.5591475078756676e-05,
"loss": 0.2791,
"step": 31000
},
{
"epoch": 0.66,
"learning_rate": 1.5577252654259974e-05,
"loss": 0.28,
"step": 31100
},
{
"epoch": 0.67,
"learning_rate": 1.556303022976327e-05,
"loss": 0.2742,
"step": 31200
},
{
"epoch": 0.67,
"learning_rate": 1.5548807805266564e-05,
"loss": 0.264,
"step": 31300
},
{
"epoch": 0.67,
"learning_rate": 1.5534585380769863e-05,
"loss": 0.2747,
"step": 31400
},
{
"epoch": 0.67,
"learning_rate": 1.5520362956273158e-05,
"loss": 0.2704,
"step": 31500
},
{
"epoch": 0.67,
"learning_rate": 1.5506140531776452e-05,
"loss": 0.2846,
"step": 31600
},
{
"epoch": 0.68,
"learning_rate": 1.5491918107279747e-05,
"loss": 0.2753,
"step": 31700
},
{
"epoch": 0.68,
"learning_rate": 1.5477695682783046e-05,
"loss": 0.2654,
"step": 31800
},
{
"epoch": 0.68,
"learning_rate": 1.546347325828634e-05,
"loss": 0.2666,
"step": 31900
},
{
"epoch": 0.68,
"learning_rate": 1.5449250833789636e-05,
"loss": 0.2661,
"step": 32000
},
{
"epoch": 0.68,
"learning_rate": 1.5435028409292934e-05,
"loss": 0.2738,
"step": 32100
},
{
"epoch": 0.69,
"learning_rate": 1.542080598479623e-05,
"loss": 0.2766,
"step": 32200
},
{
"epoch": 0.69,
"learning_rate": 1.5406583560299527e-05,
"loss": 0.2744,
"step": 32300
},
{
"epoch": 0.69,
"learning_rate": 1.5392361135802822e-05,
"loss": 0.2701,
"step": 32400
},
{
"epoch": 0.69,
"learning_rate": 1.5378138711306117e-05,
"loss": 0.2772,
"step": 32500
},
{
"epoch": 0.7,
"learning_rate": 1.5363916286809412e-05,
"loss": 0.2788,
"step": 32600
},
{
"epoch": 0.7,
"learning_rate": 1.5349693862312707e-05,
"loss": 0.2747,
"step": 32700
},
{
"epoch": 0.7,
"learning_rate": 1.5335471437816006e-05,
"loss": 0.2543,
"step": 32800
},
{
"epoch": 0.7,
"learning_rate": 1.53212490133193e-05,
"loss": 0.2654,
"step": 32900
},
{
"epoch": 0.7,
"learning_rate": 1.53070265888226e-05,
"loss": 0.2593,
"step": 33000
},
{
"epoch": 0.71,
"learning_rate": 1.5292804164325894e-05,
"loss": 0.2631,
"step": 33100
},
{
"epoch": 0.71,
"learning_rate": 1.527858173982919e-05,
"loss": 0.2654,
"step": 33200
},
{
"epoch": 0.71,
"learning_rate": 1.5264359315332487e-05,
"loss": 0.2684,
"step": 33300
},
{
"epoch": 0.71,
"learning_rate": 1.5250136890835782e-05,
"loss": 0.2575,
"step": 33400
},
{
"epoch": 0.71,
"learning_rate": 1.5235914466339079e-05,
"loss": 0.2711,
"step": 33500
},
{
"epoch": 0.72,
"learning_rate": 1.5221692041842372e-05,
"loss": 0.2589,
"step": 33600
},
{
"epoch": 0.72,
"learning_rate": 1.5207469617345669e-05,
"loss": 0.2659,
"step": 33700
},
{
"epoch": 0.72,
"learning_rate": 1.5193247192848966e-05,
"loss": 0.2513,
"step": 33800
},
{
"epoch": 0.72,
"learning_rate": 1.5179024768352262e-05,
"loss": 0.2681,
"step": 33900
},
{
"epoch": 0.73,
"learning_rate": 1.5164802343855557e-05,
"loss": 0.2528,
"step": 34000
},
{
"epoch": 0.73,
"learning_rate": 1.5150579919358854e-05,
"loss": 0.251,
"step": 34100
},
{
"epoch": 0.73,
"learning_rate": 1.513635749486215e-05,
"loss": 0.2487,
"step": 34200
},
{
"epoch": 0.73,
"learning_rate": 1.5122135070365447e-05,
"loss": 0.2498,
"step": 34300
},
{
"epoch": 0.73,
"learning_rate": 1.5107912645868742e-05,
"loss": 0.2506,
"step": 34400
},
{
"epoch": 0.74,
"learning_rate": 1.5093690221372039e-05,
"loss": 0.2549,
"step": 34500
},
{
"epoch": 0.74,
"learning_rate": 1.5079467796875336e-05,
"loss": 0.2482,
"step": 34600
},
{
"epoch": 0.74,
"learning_rate": 1.5065245372378629e-05,
"loss": 0.256,
"step": 34700
},
{
"epoch": 0.74,
"learning_rate": 1.5051022947881926e-05,
"loss": 0.2503,
"step": 34800
},
{
"epoch": 0.74,
"learning_rate": 1.5036800523385222e-05,
"loss": 0.2569,
"step": 34900
},
{
"epoch": 0.75,
"learning_rate": 1.5022578098888519e-05,
"loss": 0.2558,
"step": 35000
},
{
"epoch": 0.75,
"eval_loss": 0.2726137340068817,
"eval_runtime": 35.0016,
"eval_samples_per_second": 142.85,
"eval_steps_per_second": 1.143,
"step": 35000
},
{
"epoch": 0.75,
"learning_rate": 1.5008355674391814e-05,
"loss": 0.244,
"step": 35100
},
{
"epoch": 0.75,
"learning_rate": 1.499413324989511e-05,
"loss": 0.2551,
"step": 35200
},
{
"epoch": 0.75,
"learning_rate": 1.4979910825398407e-05,
"loss": 0.2396,
"step": 35300
},
{
"epoch": 0.76,
"learning_rate": 1.4965688400901704e-05,
"loss": 0.2579,
"step": 35400
},
{
"epoch": 0.76,
"learning_rate": 1.4951465976404999e-05,
"loss": 0.2444,
"step": 35500
},
{
"epoch": 0.76,
"learning_rate": 1.4937243551908295e-05,
"loss": 0.239,
"step": 35600
},
{
"epoch": 0.76,
"learning_rate": 1.4923021127411592e-05,
"loss": 0.2381,
"step": 35700
},
{
"epoch": 0.76,
"learning_rate": 1.4908798702914885e-05,
"loss": 0.2578,
"step": 35800
},
{
"epoch": 0.77,
"learning_rate": 1.4894576278418182e-05,
"loss": 0.2467,
"step": 35900
},
{
"epoch": 0.77,
"learning_rate": 1.4880353853921479e-05,
"loss": 0.238,
"step": 36000
},
{
"epoch": 0.77,
"learning_rate": 1.4866131429424775e-05,
"loss": 0.2441,
"step": 36100
},
{
"epoch": 0.77,
"learning_rate": 1.485190900492807e-05,
"loss": 0.2445,
"step": 36200
},
{
"epoch": 0.77,
"learning_rate": 1.4837686580431367e-05,
"loss": 0.2464,
"step": 36300
},
{
"epoch": 0.78,
"learning_rate": 1.4823464155934664e-05,
"loss": 0.2414,
"step": 36400
},
{
"epoch": 0.78,
"learning_rate": 1.480924173143796e-05,
"loss": 0.2462,
"step": 36500
},
{
"epoch": 0.78,
"learning_rate": 1.4795019306941255e-05,
"loss": 0.2411,
"step": 36600
},
{
"epoch": 0.78,
"learning_rate": 1.4780796882444552e-05,
"loss": 0.2378,
"step": 36700
},
{
"epoch": 0.79,
"learning_rate": 1.4766574457947847e-05,
"loss": 0.2298,
"step": 36800
},
{
"epoch": 0.79,
"learning_rate": 1.4752352033451142e-05,
"loss": 0.2382,
"step": 36900
},
{
"epoch": 0.79,
"learning_rate": 1.4738129608954439e-05,
"loss": 0.235,
"step": 37000
},
{
"epoch": 0.79,
"learning_rate": 1.4723907184457735e-05,
"loss": 0.2321,
"step": 37100
},
{
"epoch": 0.79,
"learning_rate": 1.4709684759961032e-05,
"loss": 0.229,
"step": 37200
},
{
"epoch": 0.8,
"learning_rate": 1.4695462335464327e-05,
"loss": 0.2411,
"step": 37300
},
{
"epoch": 0.8,
"learning_rate": 1.4681239910967624e-05,
"loss": 0.2269,
"step": 37400
},
{
"epoch": 0.8,
"learning_rate": 1.466701748647092e-05,
"loss": 0.2357,
"step": 37500
},
{
"epoch": 0.8,
"learning_rate": 1.4652795061974217e-05,
"loss": 0.2324,
"step": 37600
},
{
"epoch": 0.8,
"learning_rate": 1.4638572637477514e-05,
"loss": 0.2306,
"step": 37700
},
{
"epoch": 0.81,
"learning_rate": 1.4624350212980809e-05,
"loss": 0.2382,
"step": 37800
},
{
"epoch": 0.81,
"learning_rate": 1.4610127788484104e-05,
"loss": 0.2418,
"step": 37900
},
{
"epoch": 0.81,
"learning_rate": 1.4595905363987399e-05,
"loss": 0.2416,
"step": 38000
},
{
"epoch": 0.81,
"learning_rate": 1.4581682939490695e-05,
"loss": 0.2287,
"step": 38100
},
{
"epoch": 0.81,
"learning_rate": 1.4567460514993992e-05,
"loss": 0.2429,
"step": 38200
},
{
"epoch": 0.82,
"learning_rate": 1.4553238090497289e-05,
"loss": 0.2481,
"step": 38300
},
{
"epoch": 0.82,
"learning_rate": 1.4539015666000584e-05,
"loss": 0.2217,
"step": 38400
},
{
"epoch": 0.82,
"learning_rate": 1.452479324150388e-05,
"loss": 0.229,
"step": 38500
},
{
"epoch": 0.82,
"learning_rate": 1.4510570817007177e-05,
"loss": 0.2338,
"step": 38600
},
{
"epoch": 0.83,
"learning_rate": 1.4496348392510474e-05,
"loss": 0.2241,
"step": 38700
},
{
"epoch": 0.83,
"learning_rate": 1.448212596801377e-05,
"loss": 0.2144,
"step": 38800
},
{
"epoch": 0.83,
"learning_rate": 1.4467903543517065e-05,
"loss": 0.2331,
"step": 38900
},
{
"epoch": 0.83,
"learning_rate": 1.445368111902036e-05,
"loss": 0.2293,
"step": 39000
},
{
"epoch": 0.83,
"learning_rate": 1.4439458694523655e-05,
"loss": 0.2323,
"step": 39100
},
{
"epoch": 0.84,
"learning_rate": 1.4425236270026952e-05,
"loss": 0.218,
"step": 39200
},
{
"epoch": 0.84,
"learning_rate": 1.4411013845530248e-05,
"loss": 0.2286,
"step": 39300
},
{
"epoch": 0.84,
"learning_rate": 1.4396791421033545e-05,
"loss": 0.2325,
"step": 39400
},
{
"epoch": 0.84,
"learning_rate": 1.438256899653684e-05,
"loss": 0.2188,
"step": 39500
},
{
"epoch": 0.84,
"learning_rate": 1.4368346572040137e-05,
"loss": 0.2322,
"step": 39600
},
{
"epoch": 0.85,
"learning_rate": 1.4354124147543433e-05,
"loss": 0.2219,
"step": 39700
},
{
"epoch": 0.85,
"learning_rate": 1.433990172304673e-05,
"loss": 0.2218,
"step": 39800
},
{
"epoch": 0.85,
"learning_rate": 1.4325679298550027e-05,
"loss": 0.2253,
"step": 39900
},
{
"epoch": 0.85,
"learning_rate": 1.431145687405332e-05,
"loss": 0.2188,
"step": 40000
},
{
"epoch": 0.85,
"eval_loss": 0.24339932203292847,
"eval_runtime": 35.0062,
"eval_samples_per_second": 142.832,
"eval_steps_per_second": 1.143,
"step": 40000
},
{
"epoch": 0.86,
"learning_rate": 1.4297234449556617e-05,
"loss": 0.2144,
"step": 40100
},
{
"epoch": 0.86,
"learning_rate": 1.4283012025059912e-05,
"loss": 0.2282,
"step": 40200
},
{
"epoch": 0.86,
"learning_rate": 1.4268789600563208e-05,
"loss": 0.2155,
"step": 40300
},
{
"epoch": 0.86,
"learning_rate": 1.4254567176066505e-05,
"loss": 0.2153,
"step": 40400
},
{
"epoch": 0.86,
"learning_rate": 1.4240344751569802e-05,
"loss": 0.225,
"step": 40500
},
{
"epoch": 0.87,
"learning_rate": 1.4226122327073097e-05,
"loss": 0.2132,
"step": 40600
},
{
"epoch": 0.87,
"learning_rate": 1.4211899902576393e-05,
"loss": 0.2241,
"step": 40700
},
{
"epoch": 0.87,
"learning_rate": 1.419767747807969e-05,
"loss": 0.2194,
"step": 40800
},
{
"epoch": 0.87,
"learning_rate": 1.4183455053582987e-05,
"loss": 0.2102,
"step": 40900
},
{
"epoch": 0.87,
"learning_rate": 1.4169232629086283e-05,
"loss": 0.2138,
"step": 41000
},
{
"epoch": 0.88,
"learning_rate": 1.4155010204589577e-05,
"loss": 0.2193,
"step": 41100
},
{
"epoch": 0.88,
"learning_rate": 1.4140787780092873e-05,
"loss": 0.2218,
"step": 41200
},
{
"epoch": 0.88,
"learning_rate": 1.4126565355596168e-05,
"loss": 0.2193,
"step": 41300
},
{
"epoch": 0.88,
"learning_rate": 1.4112342931099465e-05,
"loss": 0.2218,
"step": 41400
},
{
"epoch": 0.89,
"learning_rate": 1.4098120506602762e-05,
"loss": 0.2172,
"step": 41500
},
{
"epoch": 0.89,
"learning_rate": 1.4083898082106058e-05,
"loss": 0.2143,
"step": 41600
},
{
"epoch": 0.89,
"learning_rate": 1.4069675657609353e-05,
"loss": 0.2181,
"step": 41700
},
{
"epoch": 0.89,
"learning_rate": 1.405545323311265e-05,
"loss": 0.2078,
"step": 41800
},
{
"epoch": 0.89,
"learning_rate": 1.4041230808615947e-05,
"loss": 0.2102,
"step": 41900
},
{
"epoch": 0.9,
"learning_rate": 1.4027008384119243e-05,
"loss": 0.2017,
"step": 42000
},
{
"epoch": 0.9,
"learning_rate": 1.401278595962254e-05,
"loss": 0.209,
"step": 42100
},
{
"epoch": 0.9,
"learning_rate": 1.3998563535125833e-05,
"loss": 0.2065,
"step": 42200
},
{
"epoch": 0.9,
"learning_rate": 1.398434111062913e-05,
"loss": 0.2024,
"step": 42300
},
{
"epoch": 0.9,
"learning_rate": 1.3970118686132425e-05,
"loss": 0.1976,
"step": 42400
},
{
"epoch": 0.91,
"learning_rate": 1.3955896261635721e-05,
"loss": 0.2041,
"step": 42500
},
{
"epoch": 0.91,
"learning_rate": 1.3941673837139018e-05,
"loss": 0.1998,
"step": 42600
},
{
"epoch": 0.91,
"learning_rate": 1.3927451412642315e-05,
"loss": 0.2026,
"step": 42700
},
{
"epoch": 0.91,
"learning_rate": 1.391322898814561e-05,
"loss": 0.2031,
"step": 42800
},
{
"epoch": 0.92,
"learning_rate": 1.3899006563648906e-05,
"loss": 0.2043,
"step": 42900
},
{
"epoch": 0.92,
"learning_rate": 1.3884784139152203e-05,
"loss": 0.2098,
"step": 43000
},
{
"epoch": 0.92,
"learning_rate": 1.38705617146555e-05,
"loss": 0.206,
"step": 43100
},
{
"epoch": 0.92,
"learning_rate": 1.3856339290158793e-05,
"loss": 0.1995,
"step": 43200
},
{
"epoch": 0.92,
"learning_rate": 1.384211686566209e-05,
"loss": 0.2029,
"step": 43300
},
{
"epoch": 0.93,
"learning_rate": 1.3827894441165386e-05,
"loss": 0.2054,
"step": 43400
},
{
"epoch": 0.93,
"learning_rate": 1.3813672016668681e-05,
"loss": 0.2027,
"step": 43500
},
{
"epoch": 0.93,
"learning_rate": 1.3799449592171978e-05,
"loss": 0.196,
"step": 43600
},
{
"epoch": 0.93,
"learning_rate": 1.3785227167675275e-05,
"loss": 0.1984,
"step": 43700
},
{
"epoch": 0.93,
"learning_rate": 1.3771004743178571e-05,
"loss": 0.2033,
"step": 43800
},
{
"epoch": 0.94,
"learning_rate": 1.3756782318681866e-05,
"loss": 0.2019,
"step": 43900
},
{
"epoch": 0.94,
"learning_rate": 1.3742559894185163e-05,
"loss": 0.1965,
"step": 44000
},
{
"epoch": 0.94,
"learning_rate": 1.372833746968846e-05,
"loss": 0.1878,
"step": 44100
},
{
"epoch": 0.94,
"learning_rate": 1.3714115045191756e-05,
"loss": 0.2072,
"step": 44200
},
{
"epoch": 0.95,
"learning_rate": 1.369989262069505e-05,
"loss": 0.204,
"step": 44300
},
{
"epoch": 0.95,
"learning_rate": 1.3685670196198346e-05,
"loss": 0.1969,
"step": 44400
},
{
"epoch": 0.95,
"learning_rate": 1.3671447771701643e-05,
"loss": 0.1908,
"step": 44500
},
{
"epoch": 0.95,
"learning_rate": 1.3657225347204938e-05,
"loss": 0.1937,
"step": 44600
},
{
"epoch": 0.95,
"learning_rate": 1.3643002922708235e-05,
"loss": 0.1987,
"step": 44700
},
{
"epoch": 0.96,
"learning_rate": 1.3628780498211531e-05,
"loss": 0.1921,
"step": 44800
},
{
"epoch": 0.96,
"learning_rate": 1.3614558073714828e-05,
"loss": 0.1967,
"step": 44900
},
{
"epoch": 0.96,
"learning_rate": 1.3600335649218125e-05,
"loss": 0.1963,
"step": 45000
},
{
"epoch": 0.96,
"eval_loss": 0.21258682012557983,
"eval_runtime": 34.9419,
"eval_samples_per_second": 143.095,
"eval_steps_per_second": 1.145,
"step": 45000
},
{
"epoch": 0.96,
"learning_rate": 1.358611322472142e-05,
"loss": 0.1905,
"step": 45100
},
{
"epoch": 0.96,
"learning_rate": 1.3571890800224716e-05,
"loss": 0.1997,
"step": 45200
},
{
"epoch": 0.97,
"learning_rate": 1.3557668375728013e-05,
"loss": 0.192,
"step": 45300
},
{
"epoch": 0.97,
"learning_rate": 1.3543445951231306e-05,
"loss": 0.1973,
"step": 45400
},
{
"epoch": 0.97,
"learning_rate": 1.3529223526734603e-05,
"loss": 0.1883,
"step": 45500
},
{
"epoch": 0.97,
"learning_rate": 1.35150011022379e-05,
"loss": 0.2007,
"step": 45600
},
{
"epoch": 0.97,
"learning_rate": 1.3500778677741195e-05,
"loss": 0.1926,
"step": 45700
},
{
"epoch": 0.98,
"learning_rate": 1.3486556253244491e-05,
"loss": 0.194,
"step": 45800
},
{
"epoch": 0.98,
"learning_rate": 1.3472333828747788e-05,
"loss": 0.1946,
"step": 45900
},
{
"epoch": 0.98,
"learning_rate": 1.3458111404251085e-05,
"loss": 0.186,
"step": 46000
},
{
"epoch": 0.98,
"learning_rate": 1.3443888979754381e-05,
"loss": 0.1922,
"step": 46100
},
{
"epoch": 0.99,
"learning_rate": 1.3429666555257676e-05,
"loss": 0.1912,
"step": 46200
},
{
"epoch": 0.99,
"learning_rate": 1.3415444130760973e-05,
"loss": 0.1919,
"step": 46300
},
{
"epoch": 0.99,
"learning_rate": 1.3401221706264266e-05,
"loss": 0.1975,
"step": 46400
},
{
"epoch": 0.99,
"learning_rate": 1.3386999281767563e-05,
"loss": 0.1902,
"step": 46500
},
{
"epoch": 0.99,
"learning_rate": 1.337277685727086e-05,
"loss": 0.1858,
"step": 46600
},
{
"epoch": 1.0,
"learning_rate": 1.3358554432774156e-05,
"loss": 0.1969,
"step": 46700
},
{
"epoch": 1.0,
"learning_rate": 1.3344332008277451e-05,
"loss": 0.1741,
"step": 46800
},
{
"epoch": 1.0,
"learning_rate": 1.3330109583780748e-05,
"loss": 0.1724,
"step": 46900
},
{
"epoch": 1.0,
"learning_rate": 1.3315887159284044e-05,
"loss": 0.1066,
"step": 47000
},
{
"epoch": 1.0,
"learning_rate": 1.3301664734787341e-05,
"loss": 0.1048,
"step": 47100
},
{
"epoch": 1.01,
"learning_rate": 1.3287442310290638e-05,
"loss": 0.1075,
"step": 47200
},
{
"epoch": 1.01,
"learning_rate": 1.3273219885793933e-05,
"loss": 0.1024,
"step": 47300
},
{
"epoch": 1.01,
"learning_rate": 1.325899746129723e-05,
"loss": 0.1054,
"step": 47400
},
{
"epoch": 1.01,
"learning_rate": 1.3244775036800523e-05,
"loss": 0.1056,
"step": 47500
},
{
"epoch": 1.02,
"learning_rate": 1.323055261230382e-05,
"loss": 0.1004,
"step": 47600
},
{
"epoch": 1.02,
"learning_rate": 1.3216330187807116e-05,
"loss": 0.1074,
"step": 47700
},
{
"epoch": 1.02,
"learning_rate": 1.3202107763310413e-05,
"loss": 0.1028,
"step": 47800
},
{
"epoch": 1.02,
"learning_rate": 1.3187885338813708e-05,
"loss": 0.1092,
"step": 47900
},
{
"epoch": 1.02,
"learning_rate": 1.3173662914317004e-05,
"loss": 0.1033,
"step": 48000
},
{
"epoch": 1.03,
"learning_rate": 1.3159440489820301e-05,
"loss": 0.1025,
"step": 48100
},
{
"epoch": 1.03,
"learning_rate": 1.3145218065323598e-05,
"loss": 0.1017,
"step": 48200
},
{
"epoch": 1.03,
"learning_rate": 1.3130995640826894e-05,
"loss": 0.1091,
"step": 48300
},
{
"epoch": 1.03,
"learning_rate": 1.311677321633019e-05,
"loss": 0.1054,
"step": 48400
},
{
"epoch": 1.03,
"learning_rate": 1.3102550791833486e-05,
"loss": 0.1033,
"step": 48500
},
{
"epoch": 1.04,
"learning_rate": 1.308832836733678e-05,
"loss": 0.1082,
"step": 48600
},
{
"epoch": 1.04,
"learning_rate": 1.3074105942840076e-05,
"loss": 0.1045,
"step": 48700
},
{
"epoch": 1.04,
"learning_rate": 1.3059883518343373e-05,
"loss": 0.1083,
"step": 48800
},
{
"epoch": 1.04,
"learning_rate": 1.304566109384667e-05,
"loss": 0.1067,
"step": 48900
},
{
"epoch": 1.05,
"learning_rate": 1.3031438669349964e-05,
"loss": 0.1047,
"step": 49000
},
{
"epoch": 1.05,
"learning_rate": 1.3017216244853261e-05,
"loss": 0.1038,
"step": 49100
},
{
"epoch": 1.05,
"learning_rate": 1.3002993820356558e-05,
"loss": 0.1016,
"step": 49200
},
{
"epoch": 1.05,
"learning_rate": 1.2988771395859854e-05,
"loss": 0.1045,
"step": 49300
},
{
"epoch": 1.05,
"learning_rate": 1.2974548971363151e-05,
"loss": 0.1081,
"step": 49400
},
{
"epoch": 1.06,
"learning_rate": 1.2960326546866446e-05,
"loss": 0.111,
"step": 49500
},
{
"epoch": 1.06,
"learning_rate": 1.294610412236974e-05,
"loss": 0.1044,
"step": 49600
},
{
"epoch": 1.06,
"learning_rate": 1.2931881697873036e-05,
"loss": 0.1084,
"step": 49700
},
{
"epoch": 1.06,
"learning_rate": 1.2917659273376332e-05,
"loss": 0.106,
"step": 49800
},
{
"epoch": 1.06,
"learning_rate": 1.2903436848879629e-05,
"loss": 0.1094,
"step": 49900
},
{
"epoch": 1.07,
"learning_rate": 1.2889214424382926e-05,
"loss": 0.1154,
"step": 50000
},
{
"epoch": 1.07,
"eval_loss": 0.205108642578125,
"eval_runtime": 34.2834,
"eval_samples_per_second": 145.843,
"eval_steps_per_second": 1.167,
"step": 50000
},
{
"epoch": 1.07,
"learning_rate": 1.287499199988622e-05,
"loss": 0.0996,
"step": 50100
},
{
"epoch": 1.07,
"learning_rate": 1.2860769575389517e-05,
"loss": 0.1065,
"step": 50200
},
{
"epoch": 1.07,
"learning_rate": 1.2846547150892814e-05,
"loss": 0.1057,
"step": 50300
},
{
"epoch": 1.08,
"learning_rate": 1.283232472639611e-05,
"loss": 0.0993,
"step": 50400
},
{
"epoch": 1.08,
"learning_rate": 1.2818102301899407e-05,
"loss": 0.1095,
"step": 50500
},
{
"epoch": 1.08,
"learning_rate": 1.2803879877402702e-05,
"loss": 0.1098,
"step": 50600
},
{
"epoch": 1.08,
"learning_rate": 1.2789657452905997e-05,
"loss": 0.1148,
"step": 50700
},
{
"epoch": 1.08,
"learning_rate": 1.2775435028409292e-05,
"loss": 0.1088,
"step": 50800
},
{
"epoch": 1.09,
"learning_rate": 1.2761212603912589e-05,
"loss": 0.0996,
"step": 50900
},
{
"epoch": 1.09,
"learning_rate": 1.2746990179415886e-05,
"loss": 0.1093,
"step": 51000
},
{
"epoch": 1.09,
"learning_rate": 1.2732767754919182e-05,
"loss": 0.103,
"step": 51100
},
{
"epoch": 1.09,
"learning_rate": 1.2718545330422477e-05,
"loss": 0.1029,
"step": 51200
},
{
"epoch": 1.09,
"learning_rate": 1.2704322905925774e-05,
"loss": 0.1012,
"step": 51300
},
{
"epoch": 1.1,
"learning_rate": 1.269010048142907e-05,
"loss": 0.1077,
"step": 51400
},
{
"epoch": 1.1,
"learning_rate": 1.2675878056932367e-05,
"loss": 0.1115,
"step": 51500
},
{
"epoch": 1.1,
"learning_rate": 1.2661655632435664e-05,
"loss": 0.0979,
"step": 51600
},
{
"epoch": 1.1,
"learning_rate": 1.2647433207938959e-05,
"loss": 0.1038,
"step": 51700
},
{
"epoch": 1.11,
"learning_rate": 1.2633210783442254e-05,
"loss": 0.1101,
"step": 51800
},
{
"epoch": 1.11,
"learning_rate": 1.2618988358945549e-05,
"loss": 0.1114,
"step": 51900
},
{
"epoch": 1.11,
"learning_rate": 1.2604765934448846e-05,
"loss": 0.1026,
"step": 52000
},
{
"epoch": 1.11,
"learning_rate": 1.2590543509952142e-05,
"loss": 0.0995,
"step": 52100
},
{
"epoch": 1.11,
"learning_rate": 1.2576321085455439e-05,
"loss": 0.1126,
"step": 52200
},
{
"epoch": 1.12,
"learning_rate": 1.2562098660958734e-05,
"loss": 0.1111,
"step": 52300
},
{
"epoch": 1.12,
"learning_rate": 1.254787623646203e-05,
"loss": 0.1033,
"step": 52400
},
{
"epoch": 1.12,
"learning_rate": 1.2533653811965327e-05,
"loss": 0.1048,
"step": 52500
},
{
"epoch": 1.12,
"learning_rate": 1.2519431387468624e-05,
"loss": 0.1,
"step": 52600
},
{
"epoch": 1.12,
"learning_rate": 1.250520896297192e-05,
"loss": 0.1056,
"step": 52700
},
{
"epoch": 1.13,
"learning_rate": 1.2490986538475214e-05,
"loss": 0.1104,
"step": 52800
},
{
"epoch": 1.13,
"learning_rate": 1.247676411397851e-05,
"loss": 0.1109,
"step": 52900
},
{
"epoch": 1.13,
"learning_rate": 1.2462541689481806e-05,
"loss": 0.1053,
"step": 53000
},
{
"epoch": 1.13,
"learning_rate": 1.2448319264985102e-05,
"loss": 0.0941,
"step": 53100
},
{
"epoch": 1.13,
"learning_rate": 1.2434096840488399e-05,
"loss": 0.1011,
"step": 53200
},
{
"epoch": 1.14,
"learning_rate": 1.2419874415991696e-05,
"loss": 0.1049,
"step": 53300
},
{
"epoch": 1.14,
"learning_rate": 1.2405651991494992e-05,
"loss": 0.106,
"step": 53400
},
{
"epoch": 1.14,
"learning_rate": 1.2391429566998287e-05,
"loss": 0.1047,
"step": 53500
},
{
"epoch": 1.14,
"learning_rate": 1.2377207142501584e-05,
"loss": 0.1015,
"step": 53600
},
{
"epoch": 1.15,
"learning_rate": 1.236298471800488e-05,
"loss": 0.1021,
"step": 53700
},
{
"epoch": 1.15,
"learning_rate": 1.2348762293508177e-05,
"loss": 0.1016,
"step": 53800
},
{
"epoch": 1.15,
"learning_rate": 1.233453986901147e-05,
"loss": 0.0967,
"step": 53900
},
{
"epoch": 1.15,
"learning_rate": 1.2320317444514767e-05,
"loss": 0.1061,
"step": 54000
},
{
"epoch": 1.15,
"learning_rate": 1.2306095020018062e-05,
"loss": 0.0994,
"step": 54100
},
{
"epoch": 1.16,
"learning_rate": 1.2291872595521359e-05,
"loss": 0.0998,
"step": 54200
},
{
"epoch": 1.16,
"learning_rate": 1.2277650171024655e-05,
"loss": 0.1068,
"step": 54300
},
{
"epoch": 1.16,
"learning_rate": 1.2263427746527952e-05,
"loss": 0.1015,
"step": 54400
},
{
"epoch": 1.16,
"learning_rate": 1.2249205322031249e-05,
"loss": 0.1051,
"step": 54500
},
{
"epoch": 1.16,
"learning_rate": 1.2234982897534544e-05,
"loss": 0.1101,
"step": 54600
},
{
"epoch": 1.17,
"learning_rate": 1.222076047303784e-05,
"loss": 0.1021,
"step": 54700
},
{
"epoch": 1.17,
"learning_rate": 1.2206538048541137e-05,
"loss": 0.1047,
"step": 54800
},
{
"epoch": 1.17,
"learning_rate": 1.2192315624044434e-05,
"loss": 0.0966,
"step": 54900
},
{
"epoch": 1.17,
"learning_rate": 1.2178093199547727e-05,
"loss": 0.1028,
"step": 55000
},
{
"epoch": 1.17,
"eval_loss": 0.19255822896957397,
"eval_runtime": 34.1404,
"eval_samples_per_second": 146.454,
"eval_steps_per_second": 1.172,
"step": 55000
},
{
"epoch": 1.18,
"learning_rate": 1.2163870775051024e-05,
"loss": 0.1049,
"step": 55100
},
{
"epoch": 1.18,
"learning_rate": 1.2149648350554319e-05,
"loss": 0.0977,
"step": 55200
},
{
"epoch": 1.18,
"learning_rate": 1.2135425926057615e-05,
"loss": 0.1011,
"step": 55300
},
{
"epoch": 1.18,
"learning_rate": 1.2121203501560912e-05,
"loss": 0.1003,
"step": 55400
},
{
"epoch": 1.18,
"learning_rate": 1.2106981077064209e-05,
"loss": 0.1109,
"step": 55500
},
{
"epoch": 1.19,
"learning_rate": 1.2092758652567505e-05,
"loss": 0.1085,
"step": 55600
},
{
"epoch": 1.19,
"learning_rate": 1.20785362280708e-05,
"loss": 0.1081,
"step": 55700
},
{
"epoch": 1.19,
"learning_rate": 1.2064313803574097e-05,
"loss": 0.1012,
"step": 55800
},
{
"epoch": 1.19,
"learning_rate": 1.2050091379077394e-05,
"loss": 0.1019,
"step": 55900
},
{
"epoch": 1.19,
"learning_rate": 1.2035868954580687e-05,
"loss": 0.1036,
"step": 56000
},
{
"epoch": 1.2,
"learning_rate": 1.2021646530083984e-05,
"loss": 0.1055,
"step": 56100
},
{
"epoch": 1.2,
"learning_rate": 1.200742410558728e-05,
"loss": 0.1105,
"step": 56200
},
{
"epoch": 1.2,
"learning_rate": 1.1993201681090575e-05,
"loss": 0.1006,
"step": 56300
},
{
"epoch": 1.2,
"learning_rate": 1.1978979256593872e-05,
"loss": 0.0995,
"step": 56400
},
{
"epoch": 1.21,
"learning_rate": 1.1964756832097169e-05,
"loss": 0.1044,
"step": 56500
},
{
"epoch": 1.21,
"learning_rate": 1.1950534407600465e-05,
"loss": 0.1021,
"step": 56600
},
{
"epoch": 1.21,
"learning_rate": 1.1936311983103762e-05,
"loss": 0.1049,
"step": 56700
},
{
"epoch": 1.21,
"learning_rate": 1.1922089558607057e-05,
"loss": 0.1102,
"step": 56800
},
{
"epoch": 1.21,
"learning_rate": 1.1907867134110354e-05,
"loss": 0.1017,
"step": 56900
},
{
"epoch": 1.22,
"learning_rate": 1.189364470961365e-05,
"loss": 0.0983,
"step": 57000
},
{
"epoch": 1.22,
"learning_rate": 1.1879422285116943e-05,
"loss": 0.1003,
"step": 57100
},
{
"epoch": 1.22,
"learning_rate": 1.186519986062024e-05,
"loss": 0.1,
"step": 57200
},
{
"epoch": 1.22,
"learning_rate": 1.1850977436123537e-05,
"loss": 0.109,
"step": 57300
},
{
"epoch": 1.22,
"learning_rate": 1.1836755011626832e-05,
"loss": 0.0958,
"step": 57400
},
{
"epoch": 1.23,
"learning_rate": 1.1822532587130128e-05,
"loss": 0.1057,
"step": 57500
},
{
"epoch": 1.23,
"learning_rate": 1.1808310162633425e-05,
"loss": 0.1067,
"step": 57600
},
{
"epoch": 1.23,
"learning_rate": 1.1794087738136722e-05,
"loss": 0.1108,
"step": 57700
},
{
"epoch": 1.23,
"learning_rate": 1.1779865313640018e-05,
"loss": 0.1089,
"step": 57800
},
{
"epoch": 1.24,
"learning_rate": 1.1765642889143313e-05,
"loss": 0.1035,
"step": 57900
},
{
"epoch": 1.24,
"learning_rate": 1.175142046464661e-05,
"loss": 0.1025,
"step": 58000
},
{
"epoch": 1.24,
"learning_rate": 1.1737198040149907e-05,
"loss": 0.1002,
"step": 58100
},
{
"epoch": 1.24,
"learning_rate": 1.17229756156532e-05,
"loss": 0.1018,
"step": 58200
},
{
"epoch": 1.24,
"learning_rate": 1.1708753191156497e-05,
"loss": 0.108,
"step": 58300
},
{
"epoch": 1.25,
"learning_rate": 1.1694530766659793e-05,
"loss": 0.0987,
"step": 58400
},
{
"epoch": 1.25,
"learning_rate": 1.1680308342163088e-05,
"loss": 0.1099,
"step": 58500
},
{
"epoch": 1.25,
"learning_rate": 1.1666085917666385e-05,
"loss": 0.1024,
"step": 58600
},
{
"epoch": 1.25,
"learning_rate": 1.1651863493169682e-05,
"loss": 0.0957,
"step": 58700
},
{
"epoch": 1.25,
"learning_rate": 1.1637641068672978e-05,
"loss": 0.1044,
"step": 58800
},
{
"epoch": 1.26,
"learning_rate": 1.1623418644176275e-05,
"loss": 0.1007,
"step": 58900
},
{
"epoch": 1.26,
"learning_rate": 1.160919621967957e-05,
"loss": 0.1042,
"step": 59000
},
{
"epoch": 1.26,
"learning_rate": 1.1594973795182867e-05,
"loss": 0.1059,
"step": 59100
},
{
"epoch": 1.26,
"learning_rate": 1.158075137068616e-05,
"loss": 0.1014,
"step": 59200
},
{
"epoch": 1.27,
"learning_rate": 1.1566528946189457e-05,
"loss": 0.1,
"step": 59300
},
{
"epoch": 1.27,
"learning_rate": 1.1552306521692753e-05,
"loss": 0.0947,
"step": 59400
},
{
"epoch": 1.27,
"learning_rate": 1.153808409719605e-05,
"loss": 0.1025,
"step": 59500
},
{
"epoch": 1.27,
"learning_rate": 1.1523861672699345e-05,
"loss": 0.1032,
"step": 59600
},
{
"epoch": 1.27,
"learning_rate": 1.1509639248202642e-05,
"loss": 0.1048,
"step": 59700
},
{
"epoch": 1.28,
"learning_rate": 1.1495416823705938e-05,
"loss": 0.0976,
"step": 59800
},
{
"epoch": 1.28,
"learning_rate": 1.1481194399209235e-05,
"loss": 0.1036,
"step": 59900
},
{
"epoch": 1.28,
"learning_rate": 1.1466971974712532e-05,
"loss": 0.1097,
"step": 60000
},
{
"epoch": 1.28,
"eval_loss": 0.18599912524223328,
"eval_runtime": 34.2806,
"eval_samples_per_second": 145.855,
"eval_steps_per_second": 1.167,
"step": 60000
},
{
"epoch": 1.28,
"learning_rate": 1.1452749550215827e-05,
"loss": 0.1038,
"step": 60100
},
{
"epoch": 1.28,
"learning_rate": 1.1438527125719123e-05,
"loss": 0.1013,
"step": 60200
},
{
"epoch": 1.29,
"learning_rate": 1.1424304701222416e-05,
"loss": 0.098,
"step": 60300
},
{
"epoch": 1.29,
"learning_rate": 1.1410082276725713e-05,
"loss": 0.1002,
"step": 60400
},
{
"epoch": 1.29,
"learning_rate": 1.139585985222901e-05,
"loss": 0.105,
"step": 60500
},
{
"epoch": 1.29,
"learning_rate": 1.1381637427732307e-05,
"loss": 0.1002,
"step": 60600
},
{
"epoch": 1.29,
"learning_rate": 1.1367415003235601e-05,
"loss": 0.0973,
"step": 60700
},
{
"epoch": 1.3,
"learning_rate": 1.1353192578738898e-05,
"loss": 0.1038,
"step": 60800
},
{
"epoch": 1.3,
"learning_rate": 1.1338970154242195e-05,
"loss": 0.0989,
"step": 60900
},
{
"epoch": 1.3,
"learning_rate": 1.1324747729745491e-05,
"loss": 0.1096,
"step": 61000
},
{
"epoch": 1.3,
"learning_rate": 1.1310525305248788e-05,
"loss": 0.0869,
"step": 61100
},
{
"epoch": 1.31,
"learning_rate": 1.1296302880752083e-05,
"loss": 0.1003,
"step": 61200
},
{
"epoch": 1.31,
"learning_rate": 1.128208045625538e-05,
"loss": 0.0945,
"step": 61300
},
{
"epoch": 1.31,
"learning_rate": 1.1267858031758673e-05,
"loss": 0.1004,
"step": 61400
},
{
"epoch": 1.31,
"learning_rate": 1.125363560726197e-05,
"loss": 0.0984,
"step": 61500
},
{
"epoch": 1.31,
"learning_rate": 1.1239413182765266e-05,
"loss": 0.098,
"step": 61600
},
{
"epoch": 1.32,
"learning_rate": 1.1225190758268563e-05,
"loss": 0.0982,
"step": 61700
},
{
"epoch": 1.32,
"learning_rate": 1.121096833377186e-05,
"loss": 0.0988,
"step": 61800
},
{
"epoch": 1.32,
"learning_rate": 1.1196745909275155e-05,
"loss": 0.0999,
"step": 61900
},
{
"epoch": 1.32,
"learning_rate": 1.1182523484778451e-05,
"loss": 0.1028,
"step": 62000
},
{
"epoch": 1.32,
"learning_rate": 1.1168301060281748e-05,
"loss": 0.0966,
"step": 62100
},
{
"epoch": 1.33,
"learning_rate": 1.1154078635785045e-05,
"loss": 0.0881,
"step": 62200
},
{
"epoch": 1.33,
"learning_rate": 1.113985621128834e-05,
"loss": 0.1008,
"step": 62300
},
{
"epoch": 1.33,
"learning_rate": 1.1125633786791635e-05,
"loss": 0.1029,
"step": 62400
},
{
"epoch": 1.33,
"learning_rate": 1.111141136229493e-05,
"loss": 0.0981,
"step": 62500
},
{
"epoch": 1.34,
"learning_rate": 1.1097188937798226e-05,
"loss": 0.093,
"step": 62600
},
{
"epoch": 1.34,
"learning_rate": 1.1082966513301523e-05,
"loss": 0.1009,
"step": 62700
},
{
"epoch": 1.34,
"learning_rate": 1.106874408880482e-05,
"loss": 0.0988,
"step": 62800
},
{
"epoch": 1.34,
"learning_rate": 1.1054521664308116e-05,
"loss": 0.0947,
"step": 62900
},
{
"epoch": 1.34,
"learning_rate": 1.1040299239811411e-05,
"loss": 0.1021,
"step": 63000
},
{
"epoch": 1.35,
"learning_rate": 1.1026076815314708e-05,
"loss": 0.0948,
"step": 63100
},
{
"epoch": 1.35,
"learning_rate": 1.1011854390818005e-05,
"loss": 0.0934,
"step": 63200
},
{
"epoch": 1.35,
"learning_rate": 1.0997631966321301e-05,
"loss": 0.0919,
"step": 63300
},
{
"epoch": 1.35,
"learning_rate": 1.0983409541824596e-05,
"loss": 0.0959,
"step": 63400
},
{
"epoch": 1.35,
"learning_rate": 1.0969187117327891e-05,
"loss": 0.0964,
"step": 63500
},
{
"epoch": 1.36,
"learning_rate": 1.0954964692831186e-05,
"loss": 0.1008,
"step": 63600
},
{
"epoch": 1.36,
"learning_rate": 1.0940742268334483e-05,
"loss": 0.1005,
"step": 63700
},
{
"epoch": 1.36,
"learning_rate": 1.092651984383778e-05,
"loss": 0.0891,
"step": 63800
},
{
"epoch": 1.36,
"learning_rate": 1.0912297419341076e-05,
"loss": 0.0962,
"step": 63900
},
{
"epoch": 1.37,
"learning_rate": 1.0898074994844373e-05,
"loss": 0.0891,
"step": 64000
},
{
"epoch": 1.37,
"learning_rate": 1.0883852570347668e-05,
"loss": 0.0946,
"step": 64100
},
{
"epoch": 1.37,
"learning_rate": 1.0869630145850965e-05,
"loss": 0.0977,
"step": 64200
},
{
"epoch": 1.37,
"learning_rate": 1.0855407721354261e-05,
"loss": 0.0999,
"step": 64300
},
{
"epoch": 1.37,
"learning_rate": 1.0841185296857558e-05,
"loss": 0.1032,
"step": 64400
},
{
"epoch": 1.38,
"learning_rate": 1.0826962872360853e-05,
"loss": 0.087,
"step": 64500
},
{
"epoch": 1.38,
"learning_rate": 1.0812740447864148e-05,
"loss": 0.0976,
"step": 64600
},
{
"epoch": 1.38,
"learning_rate": 1.0798518023367443e-05,
"loss": 0.1002,
"step": 64700
},
{
"epoch": 1.38,
"learning_rate": 1.078429559887074e-05,
"loss": 0.0991,
"step": 64800
},
{
"epoch": 1.38,
"learning_rate": 1.0770073174374036e-05,
"loss": 0.0928,
"step": 64900
},
{
"epoch": 1.39,
"learning_rate": 1.0755850749877333e-05,
"loss": 0.0966,
"step": 65000
},
{
"epoch": 1.39,
"learning_rate": 1.074162832538063e-05,
"loss": 0.0916,
"step": 65100
},
{
"epoch": 1.39,
"learning_rate": 1.0727405900883924e-05,
"loss": 0.0984,
"step": 65200
},
{
"epoch": 1.39,
"learning_rate": 1.0713183476387221e-05,
"loss": 0.1021,
"step": 65300
},
{
"epoch": 1.4,
"learning_rate": 1.0698961051890518e-05,
"loss": 0.092,
"step": 65400
},
{
"epoch": 1.4,
"learning_rate": 1.0684738627393814e-05,
"loss": 0.0997,
"step": 65500
},
{
"epoch": 1.4,
"learning_rate": 1.0670516202897108e-05,
"loss": 0.0862,
"step": 65600
},
{
"epoch": 1.4,
"learning_rate": 1.0656293778400404e-05,
"loss": 0.091,
"step": 65700
},
{
"epoch": 1.4,
"learning_rate": 1.06420713539037e-05,
"loss": 0.0945,
"step": 65800
},
{
"epoch": 1.41,
"learning_rate": 1.0627848929406996e-05,
"loss": 0.089,
"step": 65900
},
{
"epoch": 1.41,
"learning_rate": 1.0613626504910293e-05,
"loss": 0.0943,
"step": 66000
},
{
"epoch": 1.41,
"learning_rate": 1.059940408041359e-05,
"loss": 0.0919,
"step": 66100
},
{
"epoch": 1.41,
"learning_rate": 1.0585181655916886e-05,
"loss": 0.0947,
"step": 66200
},
{
"epoch": 1.41,
"learning_rate": 1.0570959231420181e-05,
"loss": 0.0915,
"step": 66300
},
{
"epoch": 1.42,
"learning_rate": 1.0556736806923478e-05,
"loss": 0.1016,
"step": 66400
},
{
"epoch": 1.42,
"learning_rate": 1.0542514382426774e-05,
"loss": 0.0953,
"step": 66500
},
{
"epoch": 1.42,
"learning_rate": 1.0528291957930071e-05,
"loss": 0.0953,
"step": 66600
},
{
"epoch": 1.42,
"learning_rate": 1.0514069533433364e-05,
"loss": 0.0943,
"step": 66700
},
{
"epoch": 1.43,
"learning_rate": 1.0499847108936661e-05,
"loss": 0.0983,
"step": 66800
},
{
"epoch": 1.43,
"learning_rate": 1.0485624684439956e-05,
"loss": 0.0929,
"step": 66900
},
{
"epoch": 1.43,
"learning_rate": 1.0471402259943253e-05,
"loss": 0.0983,
"step": 67000
},
{
"epoch": 1.43,
"learning_rate": 1.045717983544655e-05,
"loss": 0.0928,
"step": 67100
},
{
"epoch": 1.43,
"learning_rate": 1.0442957410949846e-05,
"loss": 0.0887,
"step": 67200
},
{
"epoch": 1.44,
"learning_rate": 1.0428734986453143e-05,
"loss": 0.0927,
"step": 67300
},
{
"epoch": 1.44,
"learning_rate": 1.0414512561956438e-05,
"loss": 0.0957,
"step": 67400
},
{
"epoch": 1.44,
"learning_rate": 1.0400290137459734e-05,
"loss": 0.0905,
"step": 67500
},
{
"epoch": 1.44,
"learning_rate": 1.0386067712963031e-05,
"loss": 0.0943,
"step": 67600
},
{
"epoch": 1.44,
"learning_rate": 1.0371845288466328e-05,
"loss": 0.093,
"step": 67700
},
{
"epoch": 1.45,
"learning_rate": 1.035762286396962e-05,
"loss": 0.0922,
"step": 67800
},
{
"epoch": 1.45,
"learning_rate": 1.0343400439472917e-05,
"loss": 0.095,
"step": 67900
},
{
"epoch": 1.45,
"learning_rate": 1.0329178014976212e-05,
"loss": 0.0904,
"step": 68000
},
{
"epoch": 1.45,
"learning_rate": 1.0314955590479509e-05,
"loss": 0.0981,
"step": 68100
},
{
"epoch": 1.45,
"learning_rate": 1.0300733165982806e-05,
"loss": 0.0973,
"step": 68200
},
{
"epoch": 1.46,
"learning_rate": 1.0286510741486102e-05,
"loss": 0.0891,
"step": 68300
},
{
"epoch": 1.46,
"learning_rate": 1.0272288316989399e-05,
"loss": 0.0942,
"step": 68400
},
{
"epoch": 1.46,
"learning_rate": 1.0258065892492694e-05,
"loss": 0.0869,
"step": 68500
},
{
"epoch": 1.46,
"learning_rate": 1.024384346799599e-05,
"loss": 0.1023,
"step": 68600
},
{
"epoch": 1.47,
"learning_rate": 1.0229621043499287e-05,
"loss": 0.1025,
"step": 68700
},
{
"epoch": 1.47,
"learning_rate": 1.021539861900258e-05,
"loss": 0.0878,
"step": 68800
},
{
"epoch": 1.47,
"learning_rate": 1.0201176194505877e-05,
"loss": 0.0921,
"step": 68900
},
{
"epoch": 1.47,
"learning_rate": 1.0186953770009174e-05,
"loss": 0.09,
"step": 69000
},
{
"epoch": 1.47,
"learning_rate": 1.0172731345512469e-05,
"loss": 0.0925,
"step": 69100
},
{
"epoch": 1.48,
"learning_rate": 1.0158508921015766e-05,
"loss": 0.0873,
"step": 69200
},
{
"epoch": 1.48,
"learning_rate": 1.0144286496519062e-05,
"loss": 0.089,
"step": 69300
},
{
"epoch": 1.48,
"learning_rate": 1.0130064072022359e-05,
"loss": 0.092,
"step": 69400
},
{
"epoch": 1.48,
"learning_rate": 1.0115841647525656e-05,
"loss": 0.0985,
"step": 69500
},
{
"epoch": 1.48,
"learning_rate": 1.010161922302895e-05,
"loss": 0.0895,
"step": 69600
},
{
"epoch": 1.49,
"learning_rate": 1.0087396798532247e-05,
"loss": 0.0845,
"step": 69700
},
{
"epoch": 1.49,
"learning_rate": 1.0073174374035544e-05,
"loss": 0.0905,
"step": 69800
},
{
"epoch": 1.49,
"learning_rate": 1.0058951949538837e-05,
"loss": 0.0901,
"step": 69900
},
{
"epoch": 1.49,
"learning_rate": 1.0044729525042134e-05,
"loss": 0.0955,
"step": 70000
},
{
"epoch": 1.49,
"eval_loss": 0.16799671947956085,
"eval_runtime": 34.201,
"eval_samples_per_second": 146.194,
"eval_steps_per_second": 1.17,
"step": 70000
},
{
"epoch": 1.5,
"learning_rate": 1.003050710054543e-05,
"loss": 0.0864,
"step": 70100
},
{
"epoch": 1.5,
"learning_rate": 1.0016284676048727e-05,
"loss": 0.0888,
"step": 70200
},
{
"epoch": 1.5,
"learning_rate": 1.0002062251552022e-05,
"loss": 0.0898,
"step": 70300
},
{
"epoch": 1.5,
"learning_rate": 9.987839827055319e-06,
"loss": 0.0887,
"step": 70400
},
{
"epoch": 1.5,
"learning_rate": 9.973617402558616e-06,
"loss": 0.0928,
"step": 70500
},
{
"epoch": 1.51,
"learning_rate": 9.959394978061912e-06,
"loss": 0.0897,
"step": 70600
},
{
"epoch": 1.51,
"learning_rate": 9.945172553565207e-06,
"loss": 0.0889,
"step": 70700
},
{
"epoch": 1.51,
"learning_rate": 9.930950129068502e-06,
"loss": 0.0922,
"step": 70800
},
{
"epoch": 1.51,
"learning_rate": 9.916727704571799e-06,
"loss": 0.0969,
"step": 70900
},
{
"epoch": 1.51,
"learning_rate": 9.902505280075096e-06,
"loss": 0.0905,
"step": 71000
},
{
"epoch": 1.52,
"learning_rate": 9.888282855578392e-06,
"loss": 0.0882,
"step": 71100
},
{
"epoch": 1.52,
"learning_rate": 9.874060431081687e-06,
"loss": 0.0953,
"step": 71200
},
{
"epoch": 1.52,
"learning_rate": 9.859838006584984e-06,
"loss": 0.0907,
"step": 71300
},
{
"epoch": 1.52,
"learning_rate": 9.845615582088279e-06,
"loss": 0.0932,
"step": 71400
},
{
"epoch": 1.53,
"learning_rate": 9.831393157591576e-06,
"loss": 0.0937,
"step": 71500
},
{
"epoch": 1.53,
"learning_rate": 9.817170733094872e-06,
"loss": 0.0932,
"step": 71600
},
{
"epoch": 1.53,
"learning_rate": 9.802948308598169e-06,
"loss": 0.0891,
"step": 71700
},
{
"epoch": 1.53,
"learning_rate": 9.788725884101464e-06,
"loss": 0.0924,
"step": 71800
},
{
"epoch": 1.53,
"learning_rate": 9.774503459604759e-06,
"loss": 0.0882,
"step": 71900
},
{
"epoch": 1.54,
"learning_rate": 9.760281035108055e-06,
"loss": 0.0901,
"step": 72000
},
{
"epoch": 1.54,
"learning_rate": 9.746058610611352e-06,
"loss": 0.0899,
"step": 72100
},
{
"epoch": 1.54,
"learning_rate": 9.731836186114649e-06,
"loss": 0.0897,
"step": 72200
},
{
"epoch": 1.54,
"learning_rate": 9.717613761617944e-06,
"loss": 0.0957,
"step": 72300
},
{
"epoch": 1.54,
"learning_rate": 9.70339133712124e-06,
"loss": 0.0909,
"step": 72400
},
{
"epoch": 1.55,
"learning_rate": 9.689168912624535e-06,
"loss": 0.0907,
"step": 72500
},
{
"epoch": 1.55,
"learning_rate": 9.674946488127832e-06,
"loss": 0.0873,
"step": 72600
},
{
"epoch": 1.55,
"learning_rate": 9.660724063631129e-06,
"loss": 0.093,
"step": 72700
},
{
"epoch": 1.55,
"learning_rate": 9.646501639134424e-06,
"loss": 0.0951,
"step": 72800
},
{
"epoch": 1.56,
"learning_rate": 9.63227921463772e-06,
"loss": 0.0905,
"step": 72900
},
{
"epoch": 1.56,
"learning_rate": 9.618056790141015e-06,
"loss": 0.0884,
"step": 73000
},
{
"epoch": 1.56,
"learning_rate": 9.603834365644312e-06,
"loss": 0.0898,
"step": 73100
},
{
"epoch": 1.56,
"learning_rate": 9.589611941147609e-06,
"loss": 0.0863,
"step": 73200
},
{
"epoch": 1.56,
"learning_rate": 9.575389516650905e-06,
"loss": 0.0922,
"step": 73300
},
{
"epoch": 1.57,
"learning_rate": 9.5611670921542e-06,
"loss": 0.0836,
"step": 73400
},
{
"epoch": 1.57,
"learning_rate": 9.546944667657497e-06,
"loss": 0.0892,
"step": 73500
},
{
"epoch": 1.57,
"learning_rate": 9.532722243160792e-06,
"loss": 0.0907,
"step": 73600
},
{
"epoch": 1.57,
"learning_rate": 9.518499818664089e-06,
"loss": 0.0852,
"step": 73700
},
{
"epoch": 1.57,
"learning_rate": 9.504277394167385e-06,
"loss": 0.0877,
"step": 73800
},
{
"epoch": 1.58,
"learning_rate": 9.49005496967068e-06,
"loss": 0.0912,
"step": 73900
},
{
"epoch": 1.58,
"learning_rate": 9.475832545173977e-06,
"loss": 0.0874,
"step": 74000
},
{
"epoch": 1.58,
"learning_rate": 9.461610120677272e-06,
"loss": 0.0844,
"step": 74100
},
{
"epoch": 1.58,
"learning_rate": 9.447387696180569e-06,
"loss": 0.084,
"step": 74200
},
{
"epoch": 1.59,
"learning_rate": 9.433165271683865e-06,
"loss": 0.0834,
"step": 74300
},
{
"epoch": 1.59,
"learning_rate": 9.41894284718716e-06,
"loss": 0.0897,
"step": 74400
},
{
"epoch": 1.59,
"learning_rate": 9.404720422690457e-06,
"loss": 0.0994,
"step": 74500
},
{
"epoch": 1.59,
"learning_rate": 9.390497998193754e-06,
"loss": 0.0936,
"step": 74600
},
{
"epoch": 1.59,
"learning_rate": 9.376275573697049e-06,
"loss": 0.0931,
"step": 74700
},
{
"epoch": 1.6,
"learning_rate": 9.362053149200345e-06,
"loss": 0.0835,
"step": 74800
},
{
"epoch": 1.6,
"learning_rate": 9.347830724703642e-06,
"loss": 0.0933,
"step": 74900
},
{
"epoch": 1.6,
"learning_rate": 9.333608300206937e-06,
"loss": 0.0936,
"step": 75000
},
{
"epoch": 1.6,
"learning_rate": 9.319385875710234e-06,
"loss": 0.0892,
"step": 75100
},
{
"epoch": 1.6,
"learning_rate": 9.305163451213528e-06,
"loss": 0.0857,
"step": 75200
},
{
"epoch": 1.61,
"learning_rate": 9.290941026716825e-06,
"loss": 0.0857,
"step": 75300
},
{
"epoch": 1.61,
"learning_rate": 9.276718602220122e-06,
"loss": 0.0806,
"step": 75400
},
{
"epoch": 1.61,
"learning_rate": 9.262496177723417e-06,
"loss": 0.0908,
"step": 75500
},
{
"epoch": 1.61,
"learning_rate": 9.248273753226713e-06,
"loss": 0.0869,
"step": 75600
},
{
"epoch": 1.61,
"learning_rate": 9.23405132873001e-06,
"loss": 0.0747,
"step": 75700
},
{
"epoch": 1.62,
"learning_rate": 9.219828904233305e-06,
"loss": 0.0844,
"step": 75800
},
{
"epoch": 1.62,
"learning_rate": 9.205606479736602e-06,
"loss": 0.0815,
"step": 75900
},
{
"epoch": 1.62,
"learning_rate": 9.191384055239897e-06,
"loss": 0.0819,
"step": 76000
},
{
"epoch": 1.62,
"learning_rate": 9.177161630743193e-06,
"loss": 0.0849,
"step": 76100
},
{
"epoch": 1.63,
"learning_rate": 9.16293920624649e-06,
"loss": 0.0864,
"step": 76200
},
{
"epoch": 1.63,
"learning_rate": 9.148716781749785e-06,
"loss": 0.0922,
"step": 76300
},
{
"epoch": 1.63,
"learning_rate": 9.134494357253082e-06,
"loss": 0.0853,
"step": 76400
},
{
"epoch": 1.63,
"learning_rate": 9.120271932756378e-06,
"loss": 0.0849,
"step": 76500
},
{
"epoch": 1.63,
"learning_rate": 9.106049508259673e-06,
"loss": 0.0857,
"step": 76600
},
{
"epoch": 1.64,
"learning_rate": 9.09182708376297e-06,
"loss": 0.0821,
"step": 76700
},
{
"epoch": 1.64,
"learning_rate": 9.077604659266267e-06,
"loss": 0.0887,
"step": 76800
},
{
"epoch": 1.64,
"learning_rate": 9.063382234769562e-06,
"loss": 0.0864,
"step": 76900
},
{
"epoch": 1.64,
"learning_rate": 9.049159810272858e-06,
"loss": 0.0858,
"step": 77000
},
{
"epoch": 1.64,
"learning_rate": 9.034937385776153e-06,
"loss": 0.0892,
"step": 77100
},
{
"epoch": 1.65,
"learning_rate": 9.02071496127945e-06,
"loss": 0.0804,
"step": 77200
},
{
"epoch": 1.65,
"learning_rate": 9.006492536782747e-06,
"loss": 0.0833,
"step": 77300
},
{
"epoch": 1.65,
"learning_rate": 8.992270112286042e-06,
"loss": 0.0843,
"step": 77400
},
{
"epoch": 1.65,
"learning_rate": 8.978047687789338e-06,
"loss": 0.0869,
"step": 77500
},
{
"epoch": 1.66,
"learning_rate": 8.963825263292633e-06,
"loss": 0.0907,
"step": 77600
},
{
"epoch": 1.66,
"learning_rate": 8.94960283879593e-06,
"loss": 0.0844,
"step": 77700
},
{
"epoch": 1.66,
"learning_rate": 8.935380414299227e-06,
"loss": 0.0795,
"step": 77800
},
{
"epoch": 1.66,
"learning_rate": 8.921157989802523e-06,
"loss": 0.0873,
"step": 77900
},
{
"epoch": 1.66,
"learning_rate": 8.906935565305818e-06,
"loss": 0.0829,
"step": 78000
},
{
"epoch": 1.67,
"learning_rate": 8.892713140809115e-06,
"loss": 0.0814,
"step": 78100
},
{
"epoch": 1.67,
"learning_rate": 8.87849071631241e-06,
"loss": 0.0844,
"step": 78200
},
{
"epoch": 1.67,
"learning_rate": 8.864268291815707e-06,
"loss": 0.0848,
"step": 78300
},
{
"epoch": 1.67,
"learning_rate": 8.850045867319003e-06,
"loss": 0.0911,
"step": 78400
},
{
"epoch": 1.67,
"learning_rate": 8.835823442822298e-06,
"loss": 0.0842,
"step": 78500
},
{
"epoch": 1.68,
"learning_rate": 8.821601018325595e-06,
"loss": 0.079,
"step": 78600
},
{
"epoch": 1.68,
"learning_rate": 8.80737859382889e-06,
"loss": 0.0835,
"step": 78700
},
{
"epoch": 1.68,
"learning_rate": 8.793156169332186e-06,
"loss": 0.0871,
"step": 78800
},
{
"epoch": 1.68,
"learning_rate": 8.778933744835483e-06,
"loss": 0.0809,
"step": 78900
},
{
"epoch": 1.69,
"learning_rate": 8.76471132033878e-06,
"loss": 0.0906,
"step": 79000
},
{
"epoch": 1.69,
"learning_rate": 8.750488895842075e-06,
"loss": 0.0836,
"step": 79100
},
{
"epoch": 1.69,
"learning_rate": 8.73626647134537e-06,
"loss": 0.0768,
"step": 79200
},
{
"epoch": 1.69,
"learning_rate": 8.722044046848666e-06,
"loss": 0.0844,
"step": 79300
},
{
"epoch": 1.69,
"learning_rate": 8.707821622351963e-06,
"loss": 0.0848,
"step": 79400
},
{
"epoch": 1.7,
"learning_rate": 8.69359919785526e-06,
"loss": 0.0862,
"step": 79500
},
{
"epoch": 1.7,
"learning_rate": 8.679376773358555e-06,
"loss": 0.0778,
"step": 79600
},
{
"epoch": 1.7,
"learning_rate": 8.665154348861851e-06,
"loss": 0.0813,
"step": 79700
},
{
"epoch": 1.7,
"learning_rate": 8.650931924365146e-06,
"loss": 0.0874,
"step": 79800
},
{
"epoch": 1.7,
"learning_rate": 8.636709499868443e-06,
"loss": 0.0772,
"step": 79900
},
{
"epoch": 1.71,
"learning_rate": 8.62248707537174e-06,
"loss": 0.0801,
"step": 80000
},
{
"epoch": 1.71,
"eval_loss": 0.14860820770263672,
"eval_runtime": 34.2128,
"eval_samples_per_second": 146.144,
"eval_steps_per_second": 1.169,
"step": 80000
},
{
"epoch": 1.71,
"learning_rate": 8.608264650875036e-06,
"loss": 0.087,
"step": 80100
},
{
"epoch": 1.71,
"learning_rate": 8.594042226378331e-06,
"loss": 0.0758,
"step": 80200
},
{
"epoch": 1.71,
"learning_rate": 8.579819801881626e-06,
"loss": 0.0855,
"step": 80300
},
{
"epoch": 1.72,
"learning_rate": 8.565597377384923e-06,
"loss": 0.0834,
"step": 80400
},
{
"epoch": 1.72,
"learning_rate": 8.55137495288822e-06,
"loss": 0.0846,
"step": 80500
},
{
"epoch": 1.72,
"learning_rate": 8.537152528391516e-06,
"loss": 0.079,
"step": 80600
},
{
"epoch": 1.72,
"learning_rate": 8.522930103894811e-06,
"loss": 0.0838,
"step": 80700
},
{
"epoch": 1.72,
"learning_rate": 8.508707679398108e-06,
"loss": 0.0868,
"step": 80800
},
{
"epoch": 1.73,
"learning_rate": 8.494485254901403e-06,
"loss": 0.0923,
"step": 80900
},
{
"epoch": 1.73,
"learning_rate": 8.4802628304047e-06,
"loss": 0.0851,
"step": 81000
},
{
"epoch": 1.73,
"learning_rate": 8.466040405907996e-06,
"loss": 0.0839,
"step": 81100
},
{
"epoch": 1.73,
"learning_rate": 8.451817981411293e-06,
"loss": 0.0832,
"step": 81200
},
{
"epoch": 1.73,
"learning_rate": 8.437595556914588e-06,
"loss": 0.0794,
"step": 81300
},
{
"epoch": 1.74,
"learning_rate": 8.423373132417883e-06,
"loss": 0.0772,
"step": 81400
},
{
"epoch": 1.74,
"learning_rate": 8.40915070792118e-06,
"loss": 0.0801,
"step": 81500
},
{
"epoch": 1.74,
"learning_rate": 8.394928283424476e-06,
"loss": 0.0806,
"step": 81600
},
{
"epoch": 1.74,
"learning_rate": 8.380705858927773e-06,
"loss": 0.0799,
"step": 81700
},
{
"epoch": 1.75,
"learning_rate": 8.366483434431068e-06,
"loss": 0.0823,
"step": 81800
},
{
"epoch": 1.75,
"learning_rate": 8.352261009934365e-06,
"loss": 0.0781,
"step": 81900
},
{
"epoch": 1.75,
"learning_rate": 8.33803858543766e-06,
"loss": 0.0872,
"step": 82000
},
{
"epoch": 1.75,
"learning_rate": 8.323816160940956e-06,
"loss": 0.0776,
"step": 82100
},
{
"epoch": 1.75,
"learning_rate": 8.309593736444253e-06,
"loss": 0.0801,
"step": 82200
},
{
"epoch": 1.76,
"learning_rate": 8.29537131194755e-06,
"loss": 0.0869,
"step": 82300
},
{
"epoch": 1.76,
"learning_rate": 8.281148887450845e-06,
"loss": 0.0837,
"step": 82400
},
{
"epoch": 1.76,
"learning_rate": 8.26692646295414e-06,
"loss": 0.0871,
"step": 82500
},
{
"epoch": 1.76,
"learning_rate": 8.252704038457436e-06,
"loss": 0.0787,
"step": 82600
},
{
"epoch": 1.76,
"learning_rate": 8.238481613960733e-06,
"loss": 0.0811,
"step": 82700
},
{
"epoch": 1.77,
"learning_rate": 8.22425918946403e-06,
"loss": 0.0779,
"step": 82800
},
{
"epoch": 1.77,
"learning_rate": 8.210036764967324e-06,
"loss": 0.0781,
"step": 82900
},
{
"epoch": 1.77,
"learning_rate": 8.195814340470621e-06,
"loss": 0.0797,
"step": 83000
},
{
"epoch": 1.77,
"learning_rate": 8.181591915973916e-06,
"loss": 0.0873,
"step": 83100
},
{
"epoch": 1.77,
"learning_rate": 8.167369491477213e-06,
"loss": 0.0769,
"step": 83200
},
{
"epoch": 1.78,
"learning_rate": 8.15314706698051e-06,
"loss": 0.0859,
"step": 83300
},
{
"epoch": 1.78,
"learning_rate": 8.138924642483806e-06,
"loss": 0.0745,
"step": 83400
},
{
"epoch": 1.78,
"learning_rate": 8.124702217987101e-06,
"loss": 0.0789,
"step": 83500
},
{
"epoch": 1.78,
"learning_rate": 8.110479793490396e-06,
"loss": 0.091,
"step": 83600
},
{
"epoch": 1.79,
"learning_rate": 8.096257368993693e-06,
"loss": 0.0758,
"step": 83700
},
{
"epoch": 1.79,
"learning_rate": 8.08203494449699e-06,
"loss": 0.0815,
"step": 83800
},
{
"epoch": 1.79,
"learning_rate": 8.067812520000286e-06,
"loss": 0.0852,
"step": 83900
},
{
"epoch": 1.79,
"learning_rate": 8.053590095503581e-06,
"loss": 0.0742,
"step": 84000
},
{
"epoch": 1.79,
"learning_rate": 8.039367671006878e-06,
"loss": 0.0806,
"step": 84100
},
{
"epoch": 1.8,
"learning_rate": 8.025145246510173e-06,
"loss": 0.0836,
"step": 84200
},
{
"epoch": 1.8,
"learning_rate": 8.01092282201347e-06,
"loss": 0.0771,
"step": 84300
},
{
"epoch": 1.8,
"learning_rate": 7.996700397516766e-06,
"loss": 0.0745,
"step": 84400
},
{
"epoch": 1.8,
"learning_rate": 7.982477973020063e-06,
"loss": 0.0795,
"step": 84500
},
{
"epoch": 1.8,
"learning_rate": 7.968255548523358e-06,
"loss": 0.0784,
"step": 84600
},
{
"epoch": 1.81,
"learning_rate": 7.954033124026653e-06,
"loss": 0.0762,
"step": 84700
},
{
"epoch": 1.81,
"learning_rate": 7.93981069952995e-06,
"loss": 0.0822,
"step": 84800
},
{
"epoch": 1.81,
"learning_rate": 7.925588275033246e-06,
"loss": 0.0775,
"step": 84900
},
{
"epoch": 1.81,
"learning_rate": 7.911365850536543e-06,
"loss": 0.0784,
"step": 85000
},
{
"epoch": 1.82,
"learning_rate": 7.897143426039838e-06,
"loss": 0.0823,
"step": 85100
},
{
"epoch": 1.82,
"learning_rate": 7.882921001543134e-06,
"loss": 0.077,
"step": 85200
},
{
"epoch": 1.82,
"learning_rate": 7.86869857704643e-06,
"loss": 0.0737,
"step": 85300
},
{
"epoch": 1.82,
"learning_rate": 7.854476152549726e-06,
"loss": 0.0769,
"step": 85400
},
{
"epoch": 1.82,
"learning_rate": 7.840253728053023e-06,
"loss": 0.0783,
"step": 85500
},
{
"epoch": 1.83,
"learning_rate": 7.826031303556318e-06,
"loss": 0.0817,
"step": 85600
},
{
"epoch": 1.83,
"learning_rate": 7.811808879059614e-06,
"loss": 0.0759,
"step": 85700
},
{
"epoch": 1.83,
"learning_rate": 7.79758645456291e-06,
"loss": 0.0772,
"step": 85800
},
{
"epoch": 1.83,
"learning_rate": 7.783364030066206e-06,
"loss": 0.0792,
"step": 85900
},
{
"epoch": 1.83,
"learning_rate": 7.769141605569503e-06,
"loss": 0.0738,
"step": 86000
},
{
"epoch": 1.84,
"learning_rate": 7.7549191810728e-06,
"loss": 0.0694,
"step": 86100
},
{
"epoch": 1.84,
"learning_rate": 7.740696756576094e-06,
"loss": 0.0825,
"step": 86200
},
{
"epoch": 1.84,
"learning_rate": 7.72647433207939e-06,
"loss": 0.0809,
"step": 86300
},
{
"epoch": 1.84,
"learning_rate": 7.712251907582686e-06,
"loss": 0.0751,
"step": 86400
},
{
"epoch": 1.85,
"learning_rate": 7.698029483085982e-06,
"loss": 0.0775,
"step": 86500
},
{
"epoch": 1.85,
"learning_rate": 7.683807058589279e-06,
"loss": 0.0771,
"step": 86600
},
{
"epoch": 1.85,
"learning_rate": 7.669584634092574e-06,
"loss": 0.0777,
"step": 86700
},
{
"epoch": 1.85,
"learning_rate": 7.65536220959587e-06,
"loss": 0.0773,
"step": 86800
},
{
"epoch": 1.85,
"learning_rate": 7.641139785099166e-06,
"loss": 0.0749,
"step": 86900
},
{
"epoch": 1.86,
"learning_rate": 7.626917360602462e-06,
"loss": 0.0773,
"step": 87000
},
{
"epoch": 1.86,
"learning_rate": 7.612694936105759e-06,
"loss": 0.0859,
"step": 87100
},
{
"epoch": 1.86,
"learning_rate": 7.598472511609054e-06,
"loss": 0.0798,
"step": 87200
},
{
"epoch": 1.86,
"learning_rate": 7.584250087112351e-06,
"loss": 0.0755,
"step": 87300
},
{
"epoch": 1.86,
"learning_rate": 7.5700276626156465e-06,
"loss": 0.0808,
"step": 87400
},
{
"epoch": 1.87,
"learning_rate": 7.555805238118943e-06,
"loss": 0.0725,
"step": 87500
},
{
"epoch": 1.87,
"learning_rate": 7.541582813622239e-06,
"loss": 0.0812,
"step": 87600
},
{
"epoch": 1.87,
"learning_rate": 7.527360389125536e-06,
"loss": 0.0794,
"step": 87700
},
{
"epoch": 1.87,
"learning_rate": 7.513137964628831e-06,
"loss": 0.0692,
"step": 87800
},
{
"epoch": 1.88,
"learning_rate": 7.4989155401321265e-06,
"loss": 0.0773,
"step": 87900
},
{
"epoch": 1.88,
"learning_rate": 7.484693115635423e-06,
"loss": 0.0695,
"step": 88000
},
{
"epoch": 1.88,
"learning_rate": 7.470470691138719e-06,
"loss": 0.0853,
"step": 88100
},
{
"epoch": 1.88,
"learning_rate": 7.456248266642016e-06,
"loss": 0.0759,
"step": 88200
},
{
"epoch": 1.88,
"learning_rate": 7.442025842145311e-06,
"loss": 0.0731,
"step": 88300
},
{
"epoch": 1.89,
"learning_rate": 7.427803417648607e-06,
"loss": 0.0776,
"step": 88400
},
{
"epoch": 1.89,
"learning_rate": 7.413580993151903e-06,
"loss": 0.0799,
"step": 88500
},
{
"epoch": 1.89,
"learning_rate": 7.3993585686552e-06,
"loss": 0.083,
"step": 88600
},
{
"epoch": 1.89,
"learning_rate": 7.385136144158496e-06,
"loss": 0.0833,
"step": 88700
},
{
"epoch": 1.89,
"learning_rate": 7.3709137196617906e-06,
"loss": 0.072,
"step": 88800
},
{
"epoch": 1.9,
"learning_rate": 7.356691295165087e-06,
"loss": 0.0755,
"step": 88900
},
{
"epoch": 1.9,
"learning_rate": 7.342468870668383e-06,
"loss": 0.0788,
"step": 89000
},
{
"epoch": 1.9,
"learning_rate": 7.32824644617168e-06,
"loss": 0.0782,
"step": 89100
},
{
"epoch": 1.9,
"learning_rate": 7.3140240216749755e-06,
"loss": 0.0697,
"step": 89200
},
{
"epoch": 1.91,
"learning_rate": 7.299801597178272e-06,
"loss": 0.0764,
"step": 89300
},
{
"epoch": 1.91,
"learning_rate": 7.285579172681567e-06,
"loss": 0.0711,
"step": 89400
},
{
"epoch": 1.91,
"learning_rate": 7.271356748184864e-06,
"loss": 0.0691,
"step": 89500
},
{
"epoch": 1.91,
"learning_rate": 7.25713432368816e-06,
"loss": 0.0708,
"step": 89600
},
{
"epoch": 1.91,
"learning_rate": 7.242911899191456e-06,
"loss": 0.076,
"step": 89700
},
{
"epoch": 1.92,
"learning_rate": 7.228689474694752e-06,
"loss": 0.0721,
"step": 89800
},
{
"epoch": 1.92,
"learning_rate": 7.214467050198047e-06,
"loss": 0.0758,
"step": 89900
},
{
"epoch": 1.92,
"learning_rate": 7.200244625701344e-06,
"loss": 0.0762,
"step": 90000
},
{
"epoch": 1.92,
"eval_loss": 0.1308322250843048,
"eval_runtime": 34.2018,
"eval_samples_per_second": 146.191,
"eval_steps_per_second": 1.17,
"step": 90000
},
{
"epoch": 1.92,
"learning_rate": 7.18602220120464e-06,
"loss": 0.0774,
"step": 90100
},
{
"epoch": 1.92,
"learning_rate": 7.171799776707936e-06,
"loss": 0.0753,
"step": 90200
},
{
"epoch": 1.93,
"learning_rate": 7.157577352211232e-06,
"loss": 0.0731,
"step": 90300
},
{
"epoch": 1.93,
"learning_rate": 7.143354927714528e-06,
"loss": 0.0793,
"step": 90400
},
{
"epoch": 1.93,
"learning_rate": 7.129132503217824e-06,
"loss": 0.0665,
"step": 90500
},
{
"epoch": 1.93,
"learning_rate": 7.11491007872112e-06,
"loss": 0.0732,
"step": 90600
},
{
"epoch": 1.93,
"learning_rate": 7.100687654224416e-06,
"loss": 0.0768,
"step": 90700
},
{
"epoch": 1.94,
"learning_rate": 7.086465229727713e-06,
"loss": 0.0716,
"step": 90800
},
{
"epoch": 1.94,
"learning_rate": 7.072242805231009e-06,
"loss": 0.0715,
"step": 90900
},
{
"epoch": 1.94,
"learning_rate": 7.058020380734304e-06,
"loss": 0.0748,
"step": 91000
},
{
"epoch": 1.94,
"learning_rate": 7.0437979562376e-06,
"loss": 0.0672,
"step": 91100
},
{
"epoch": 1.95,
"learning_rate": 7.029575531740896e-06,
"loss": 0.0775,
"step": 91200
},
{
"epoch": 1.95,
"learning_rate": 7.015353107244193e-06,
"loss": 0.0696,
"step": 91300
},
{
"epoch": 1.95,
"learning_rate": 7.001130682747489e-06,
"loss": 0.0688,
"step": 91400
},
{
"epoch": 1.95,
"learning_rate": 6.9869082582507845e-06,
"loss": 0.067,
"step": 91500
},
{
"epoch": 1.95,
"learning_rate": 6.97268583375408e-06,
"loss": 0.079,
"step": 91600
},
{
"epoch": 1.96,
"learning_rate": 6.958463409257377e-06,
"loss": 0.0729,
"step": 91700
},
{
"epoch": 1.96,
"learning_rate": 6.944240984760673e-06,
"loss": 0.0701,
"step": 91800
},
{
"epoch": 1.96,
"learning_rate": 6.9300185602639695e-06,
"loss": 0.0699,
"step": 91900
},
{
"epoch": 1.96,
"learning_rate": 6.9157961357672645e-06,
"loss": 0.0743,
"step": 92000
},
{
"epoch": 1.96,
"learning_rate": 6.90157371127056e-06,
"loss": 0.0657,
"step": 92100
},
{
"epoch": 1.97,
"learning_rate": 6.887351286773857e-06,
"loss": 0.071,
"step": 92200
},
{
"epoch": 1.97,
"learning_rate": 6.873128862277153e-06,
"loss": 0.0685,
"step": 92300
},
{
"epoch": 1.97,
"learning_rate": 6.8589064377804494e-06,
"loss": 0.079,
"step": 92400
},
{
"epoch": 1.97,
"learning_rate": 6.844684013283745e-06,
"loss": 0.0709,
"step": 92500
},
{
"epoch": 1.98,
"learning_rate": 6.830461588787041e-06,
"loss": 0.0693,
"step": 92600
},
{
"epoch": 1.98,
"learning_rate": 6.816239164290337e-06,
"loss": 0.0703,
"step": 92700
},
{
"epoch": 1.98,
"learning_rate": 6.8020167397936336e-06,
"loss": 0.0698,
"step": 92800
},
{
"epoch": 1.98,
"learning_rate": 6.787794315296929e-06,
"loss": 0.0727,
"step": 92900
},
{
"epoch": 1.98,
"learning_rate": 6.773571890800226e-06,
"loss": 0.0641,
"step": 93000
},
{
"epoch": 1.99,
"learning_rate": 6.759349466303521e-06,
"loss": 0.0765,
"step": 93100
},
{
"epoch": 1.99,
"learning_rate": 6.745127041806817e-06,
"loss": 0.0653,
"step": 93200
},
{
"epoch": 1.99,
"learning_rate": 6.7309046173101135e-06,
"loss": 0.0687,
"step": 93300
},
{
"epoch": 1.99,
"learning_rate": 6.716682192813409e-06,
"loss": 0.0759,
"step": 93400
},
{
"epoch": 1.99,
"learning_rate": 6.702459768316706e-06,
"loss": 0.0688,
"step": 93500
},
{
"epoch": 2.0,
"learning_rate": 6.688237343820001e-06,
"loss": 0.0673,
"step": 93600
},
{
"epoch": 2.0,
"learning_rate": 6.674014919323298e-06,
"loss": 0.0666,
"step": 93700
},
{
"epoch": 2.0,
"learning_rate": 6.6597924948265935e-06,
"loss": 0.0588,
"step": 93800
},
{
"epoch": 2.0,
"learning_rate": 6.64557007032989e-06,
"loss": 0.0303,
"step": 93900
},
{
"epoch": 2.01,
"learning_rate": 6.631347645833186e-06,
"loss": 0.0387,
"step": 94000
},
{
"epoch": 2.01,
"learning_rate": 6.617125221336483e-06,
"loss": 0.0344,
"step": 94100
},
{
"epoch": 2.01,
"learning_rate": 6.602902796839778e-06,
"loss": 0.0354,
"step": 94200
},
{
"epoch": 2.01,
"learning_rate": 6.588680372343073e-06,
"loss": 0.0306,
"step": 94300
},
{
"epoch": 2.01,
"learning_rate": 6.57445794784637e-06,
"loss": 0.0331,
"step": 94400
},
{
"epoch": 2.02,
"learning_rate": 6.560235523349666e-06,
"loss": 0.0325,
"step": 94500
},
{
"epoch": 2.02,
"learning_rate": 6.5460130988529626e-06,
"loss": 0.0319,
"step": 94600
},
{
"epoch": 2.02,
"learning_rate": 6.5317906743562575e-06,
"loss": 0.0337,
"step": 94700
},
{
"epoch": 2.02,
"learning_rate": 6.517568249859554e-06,
"loss": 0.0338,
"step": 94800
},
{
"epoch": 2.02,
"learning_rate": 6.50334582536285e-06,
"loss": 0.0362,
"step": 94900
},
{
"epoch": 2.03,
"learning_rate": 6.489123400866147e-06,
"loss": 0.0363,
"step": 95000
},
{
"epoch": 2.03,
"learning_rate": 6.4749009763694425e-06,
"loss": 0.0354,
"step": 95100
},
{
"epoch": 2.03,
"learning_rate": 6.4606785518727375e-06,
"loss": 0.0275,
"step": 95200
},
{
"epoch": 2.03,
"learning_rate": 6.446456127376034e-06,
"loss": 0.0328,
"step": 95300
},
{
"epoch": 2.04,
"learning_rate": 6.43223370287933e-06,
"loss": 0.0353,
"step": 95400
},
{
"epoch": 2.04,
"learning_rate": 6.418011278382627e-06,
"loss": 0.0342,
"step": 95500
},
{
"epoch": 2.04,
"learning_rate": 6.4037888538859225e-06,
"loss": 0.0341,
"step": 95600
},
{
"epoch": 2.04,
"learning_rate": 6.389566429389219e-06,
"loss": 0.0317,
"step": 95700
},
{
"epoch": 2.04,
"learning_rate": 6.375344004892514e-06,
"loss": 0.033,
"step": 95800
},
{
"epoch": 2.05,
"learning_rate": 6.361121580395811e-06,
"loss": 0.0334,
"step": 95900
},
{
"epoch": 2.05,
"learning_rate": 6.346899155899107e-06,
"loss": 0.0361,
"step": 96000
},
{
"epoch": 2.05,
"learning_rate": 6.332676731402403e-06,
"loss": 0.0319,
"step": 96100
},
{
"epoch": 2.05,
"learning_rate": 6.318454306905699e-06,
"loss": 0.0347,
"step": 96200
},
{
"epoch": 2.05,
"learning_rate": 6.304231882408994e-06,
"loss": 0.0333,
"step": 96300
},
{
"epoch": 2.06,
"learning_rate": 6.290009457912291e-06,
"loss": 0.037,
"step": 96400
},
{
"epoch": 2.06,
"learning_rate": 6.2757870334155865e-06,
"loss": 0.0343,
"step": 96500
},
{
"epoch": 2.06,
"learning_rate": 6.261564608918883e-06,
"loss": 0.0334,
"step": 96600
},
{
"epoch": 2.06,
"learning_rate": 6.247342184422179e-06,
"loss": 0.0312,
"step": 96700
},
{
"epoch": 2.07,
"learning_rate": 6.233119759925475e-06,
"loss": 0.0315,
"step": 96800
},
{
"epoch": 2.07,
"learning_rate": 6.218897335428771e-06,
"loss": 0.0268,
"step": 96900
},
{
"epoch": 2.07,
"learning_rate": 6.204674910932067e-06,
"loss": 0.0263,
"step": 97000
},
{
"epoch": 2.07,
"learning_rate": 6.190452486435363e-06,
"loss": 0.0303,
"step": 97100
},
{
"epoch": 2.07,
"learning_rate": 6.17623006193866e-06,
"loss": 0.0345,
"step": 97200
},
{
"epoch": 2.08,
"learning_rate": 6.162007637441956e-06,
"loss": 0.0358,
"step": 97300
},
{
"epoch": 2.08,
"learning_rate": 6.147785212945251e-06,
"loss": 0.0369,
"step": 97400
},
{
"epoch": 2.08,
"learning_rate": 6.133562788448547e-06,
"loss": 0.0392,
"step": 97500
},
{
"epoch": 2.08,
"learning_rate": 6.119340363951843e-06,
"loss": 0.0319,
"step": 97600
},
{
"epoch": 2.08,
"learning_rate": 6.10511793945514e-06,
"loss": 0.0372,
"step": 97700
},
{
"epoch": 2.09,
"learning_rate": 6.090895514958436e-06,
"loss": 0.0317,
"step": 97800
},
{
"epoch": 2.09,
"learning_rate": 6.076673090461731e-06,
"loss": 0.0321,
"step": 97900
},
{
"epoch": 2.09,
"learning_rate": 6.062450665965027e-06,
"loss": 0.0369,
"step": 98000
},
{
"epoch": 2.09,
"learning_rate": 6.048228241468324e-06,
"loss": 0.0332,
"step": 98100
},
{
"epoch": 2.09,
"learning_rate": 6.03400581697162e-06,
"loss": 0.0306,
"step": 98200
},
{
"epoch": 2.1,
"learning_rate": 6.019783392474916e-06,
"loss": 0.0337,
"step": 98300
},
{
"epoch": 2.1,
"learning_rate": 6.005560967978211e-06,
"loss": 0.0332,
"step": 98400
},
{
"epoch": 2.1,
"learning_rate": 5.991338543481507e-06,
"loss": 0.0337,
"step": 98500
},
{
"epoch": 2.1,
"learning_rate": 5.977116118984804e-06,
"loss": 0.0296,
"step": 98600
},
{
"epoch": 2.11,
"learning_rate": 5.9628936944881e-06,
"loss": 0.0341,
"step": 98700
},
{
"epoch": 2.11,
"learning_rate": 5.948671269991396e-06,
"loss": 0.0287,
"step": 98800
},
{
"epoch": 2.11,
"learning_rate": 5.934448845494692e-06,
"loss": 0.0313,
"step": 98900
},
{
"epoch": 2.11,
"learning_rate": 5.920226420997988e-06,
"loss": 0.0335,
"step": 99000
},
{
"epoch": 2.11,
"learning_rate": 5.906003996501284e-06,
"loss": 0.0327,
"step": 99100
},
{
"epoch": 2.12,
"learning_rate": 5.8917815720045805e-06,
"loss": 0.0363,
"step": 99200
},
{
"epoch": 2.12,
"learning_rate": 5.877559147507876e-06,
"loss": 0.0339,
"step": 99300
},
{
"epoch": 2.12,
"learning_rate": 5.863336723011173e-06,
"loss": 0.0282,
"step": 99400
},
{
"epoch": 2.12,
"learning_rate": 5.849114298514468e-06,
"loss": 0.0339,
"step": 99500
},
{
"epoch": 2.12,
"learning_rate": 5.834891874017764e-06,
"loss": 0.0297,
"step": 99600
},
{
"epoch": 2.13,
"learning_rate": 5.82066944952106e-06,
"loss": 0.0287,
"step": 99700
},
{
"epoch": 2.13,
"learning_rate": 5.806447025024356e-06,
"loss": 0.0321,
"step": 99800
},
{
"epoch": 2.13,
"learning_rate": 5.792224600527653e-06,
"loss": 0.0304,
"step": 99900
},
{
"epoch": 2.13,
"learning_rate": 5.778002176030948e-06,
"loss": 0.0351,
"step": 100000
},
{
"epoch": 2.13,
"eval_loss": 0.13596704602241516,
"eval_runtime": 34.4302,
"eval_samples_per_second": 145.221,
"eval_steps_per_second": 1.162,
"step": 100000
},
{
"epoch": 2.14,
"learning_rate": 5.7637797515342445e-06,
"loss": 0.0314,
"step": 100100
},
{
"epoch": 2.14,
"learning_rate": 5.74955732703754e-06,
"loss": 0.033,
"step": 100200
},
{
"epoch": 2.14,
"learning_rate": 5.735334902540837e-06,
"loss": 0.0358,
"step": 100300
},
{
"epoch": 2.14,
"learning_rate": 5.721112478044133e-06,
"loss": 0.0345,
"step": 100400
},
{
"epoch": 2.14,
"learning_rate": 5.7068900535474295e-06,
"loss": 0.0353,
"step": 100500
},
{
"epoch": 2.15,
"learning_rate": 5.6926676290507245e-06,
"loss": 0.0347,
"step": 100600
},
{
"epoch": 2.15,
"learning_rate": 5.67844520455402e-06,
"loss": 0.0301,
"step": 100700
},
{
"epoch": 2.15,
"learning_rate": 5.664222780057317e-06,
"loss": 0.0337,
"step": 100800
},
{
"epoch": 2.15,
"learning_rate": 5.650000355560613e-06,
"loss": 0.0317,
"step": 100900
},
{
"epoch": 2.15,
"learning_rate": 5.6357779310639095e-06,
"loss": 0.0309,
"step": 101000
},
{
"epoch": 2.16,
"learning_rate": 5.6215555065672044e-06,
"loss": 0.0307,
"step": 101100
},
{
"epoch": 2.16,
"learning_rate": 5.607333082070501e-06,
"loss": 0.0321,
"step": 101200
},
{
"epoch": 2.16,
"learning_rate": 5.593110657573797e-06,
"loss": 0.0326,
"step": 101300
},
{
"epoch": 2.16,
"learning_rate": 5.578888233077094e-06,
"loss": 0.0286,
"step": 101400
},
{
"epoch": 2.17,
"learning_rate": 5.564665808580389e-06,
"loss": 0.0345,
"step": 101500
},
{
"epoch": 2.17,
"learning_rate": 5.550443384083684e-06,
"loss": 0.0285,
"step": 101600
},
{
"epoch": 2.17,
"learning_rate": 5.536220959586981e-06,
"loss": 0.0332,
"step": 101700
},
{
"epoch": 2.17,
"learning_rate": 5.521998535090277e-06,
"loss": 0.0293,
"step": 101800
},
{
"epoch": 2.17,
"learning_rate": 5.5077761105935736e-06,
"loss": 0.0347,
"step": 101900
},
{
"epoch": 2.18,
"learning_rate": 5.493553686096869e-06,
"loss": 0.0353,
"step": 102000
},
{
"epoch": 2.18,
"learning_rate": 5.479331261600166e-06,
"loss": 0.0297,
"step": 102100
},
{
"epoch": 2.18,
"learning_rate": 5.465108837103461e-06,
"loss": 0.0377,
"step": 102200
},
{
"epoch": 2.18,
"learning_rate": 5.450886412606758e-06,
"loss": 0.0387,
"step": 102300
},
{
"epoch": 2.18,
"learning_rate": 5.4366639881100535e-06,
"loss": 0.0255,
"step": 102400
},
{
"epoch": 2.19,
"learning_rate": 5.42244156361335e-06,
"loss": 0.027,
"step": 102500
},
{
"epoch": 2.19,
"learning_rate": 5.408219139116646e-06,
"loss": 0.0325,
"step": 102600
},
{
"epoch": 2.19,
"learning_rate": 5.393996714619941e-06,
"loss": 0.0302,
"step": 102700
},
{
"epoch": 2.19,
"learning_rate": 5.379774290123238e-06,
"loss": 0.0358,
"step": 102800
},
{
"epoch": 2.2,
"learning_rate": 5.3655518656265335e-06,
"loss": 0.031,
"step": 102900
},
{
"epoch": 2.2,
"learning_rate": 5.35132944112983e-06,
"loss": 0.0322,
"step": 103000
},
{
"epoch": 2.2,
"learning_rate": 5.337107016633126e-06,
"loss": 0.0321,
"step": 103100
},
{
"epoch": 2.2,
"learning_rate": 5.322884592136422e-06,
"loss": 0.0327,
"step": 103200
},
{
"epoch": 2.2,
"learning_rate": 5.308662167639718e-06,
"loss": 0.0348,
"step": 103300
},
{
"epoch": 2.21,
"learning_rate": 5.294439743143014e-06,
"loss": 0.0339,
"step": 103400
},
{
"epoch": 2.21,
"learning_rate": 5.28021731864631e-06,
"loss": 0.0319,
"step": 103500
},
{
"epoch": 2.21,
"learning_rate": 5.265994894149607e-06,
"loss": 0.0315,
"step": 103600
},
{
"epoch": 2.21,
"learning_rate": 5.2517724696529026e-06,
"loss": 0.0319,
"step": 103700
},
{
"epoch": 2.21,
"learning_rate": 5.2375500451561975e-06,
"loss": 0.0305,
"step": 103800
},
{
"epoch": 2.22,
"learning_rate": 5.223327620659494e-06,
"loss": 0.0294,
"step": 103900
},
{
"epoch": 2.22,
"learning_rate": 5.20910519616279e-06,
"loss": 0.033,
"step": 104000
},
{
"epoch": 2.22,
"learning_rate": 5.194882771666087e-06,
"loss": 0.0303,
"step": 104100
},
{
"epoch": 2.22,
"learning_rate": 5.1806603471693825e-06,
"loss": 0.0353,
"step": 104200
},
{
"epoch": 2.23,
"learning_rate": 5.166437922672678e-06,
"loss": 0.0307,
"step": 104300
},
{
"epoch": 2.23,
"learning_rate": 5.152215498175974e-06,
"loss": 0.0321,
"step": 104400
},
{
"epoch": 2.23,
"learning_rate": 5.137993073679271e-06,
"loss": 0.0328,
"step": 104500
},
{
"epoch": 2.23,
"learning_rate": 5.123770649182567e-06,
"loss": 0.0357,
"step": 104600
},
{
"epoch": 2.23,
"learning_rate": 5.109548224685863e-06,
"loss": 0.0302,
"step": 104700
},
{
"epoch": 2.24,
"learning_rate": 5.095325800189158e-06,
"loss": 0.0349,
"step": 104800
},
{
"epoch": 2.24,
"learning_rate": 5.081103375692454e-06,
"loss": 0.0295,
"step": 104900
},
{
"epoch": 2.24,
"learning_rate": 5.066880951195751e-06,
"loss": 0.0276,
"step": 105000
},
{
"epoch": 2.24,
"learning_rate": 5.052658526699047e-06,
"loss": 0.0328,
"step": 105100
},
{
"epoch": 2.24,
"learning_rate": 5.038436102202343e-06,
"loss": 0.0372,
"step": 105200
},
{
"epoch": 2.25,
"learning_rate": 5.024213677705639e-06,
"loss": 0.035,
"step": 105300
},
{
"epoch": 2.25,
"learning_rate": 5.009991253208935e-06,
"loss": 0.0294,
"step": 105400
},
{
"epoch": 2.25,
"learning_rate": 4.995768828712231e-06,
"loss": 0.0345,
"step": 105500
},
{
"epoch": 2.25,
"learning_rate": 4.981546404215527e-06,
"loss": 0.0373,
"step": 105600
},
{
"epoch": 2.25,
"learning_rate": 4.967323979718823e-06,
"loss": 0.0305,
"step": 105700
},
{
"epoch": 2.26,
"learning_rate": 4.953101555222119e-06,
"loss": 0.0323,
"step": 105800
},
{
"epoch": 2.26,
"learning_rate": 4.938879130725415e-06,
"loss": 0.0275,
"step": 105900
},
{
"epoch": 2.26,
"learning_rate": 4.924656706228711e-06,
"loss": 0.0323,
"step": 106000
},
{
"epoch": 2.26,
"learning_rate": 4.910434281732007e-06,
"loss": 0.0342,
"step": 106100
},
{
"epoch": 2.27,
"learning_rate": 4.896211857235303e-06,
"loss": 0.0315,
"step": 106200
},
{
"epoch": 2.27,
"learning_rate": 4.881989432738599e-06,
"loss": 0.0286,
"step": 106300
},
{
"epoch": 2.27,
"learning_rate": 4.867767008241896e-06,
"loss": 0.0296,
"step": 106400
},
{
"epoch": 2.27,
"learning_rate": 4.8535445837451915e-06,
"loss": 0.0416,
"step": 106500
},
{
"epoch": 2.27,
"learning_rate": 4.839322159248487e-06,
"loss": 0.0329,
"step": 106600
},
{
"epoch": 2.28,
"learning_rate": 4.825099734751784e-06,
"loss": 0.0301,
"step": 106700
},
{
"epoch": 2.28,
"learning_rate": 4.810877310255079e-06,
"loss": 0.0327,
"step": 106800
},
{
"epoch": 2.28,
"learning_rate": 4.796654885758376e-06,
"loss": 0.032,
"step": 106900
},
{
"epoch": 2.28,
"learning_rate": 4.782432461261671e-06,
"loss": 0.0292,
"step": 107000
},
{
"epoch": 2.28,
"learning_rate": 4.768210036764967e-06,
"loss": 0.0327,
"step": 107100
},
{
"epoch": 2.29,
"learning_rate": 4.753987612268264e-06,
"loss": 0.0288,
"step": 107200
},
{
"epoch": 2.29,
"learning_rate": 4.73976518777156e-06,
"loss": 0.0323,
"step": 107300
},
{
"epoch": 2.29,
"learning_rate": 4.7255427632748555e-06,
"loss": 0.0328,
"step": 107400
},
{
"epoch": 2.29,
"learning_rate": 4.711320338778152e-06,
"loss": 0.032,
"step": 107500
},
{
"epoch": 2.3,
"learning_rate": 4.697097914281448e-06,
"loss": 0.0321,
"step": 107600
},
{
"epoch": 2.3,
"learning_rate": 4.682875489784744e-06,
"loss": 0.0355,
"step": 107700
},
{
"epoch": 2.3,
"learning_rate": 4.6686530652880405e-06,
"loss": 0.0314,
"step": 107800
},
{
"epoch": 2.3,
"learning_rate": 4.6544306407913355e-06,
"loss": 0.0329,
"step": 107900
},
{
"epoch": 2.3,
"learning_rate": 4.640208216294632e-06,
"loss": 0.0322,
"step": 108000
},
{
"epoch": 2.31,
"learning_rate": 4.625985791797928e-06,
"loss": 0.0306,
"step": 108100
},
{
"epoch": 2.31,
"learning_rate": 4.611763367301224e-06,
"loss": 0.0362,
"step": 108200
},
{
"epoch": 2.31,
"learning_rate": 4.5975409428045205e-06,
"loss": 0.0304,
"step": 108300
},
{
"epoch": 2.31,
"learning_rate": 4.583318518307816e-06,
"loss": 0.028,
"step": 108400
},
{
"epoch": 2.31,
"learning_rate": 4.569096093811112e-06,
"loss": 0.0309,
"step": 108500
},
{
"epoch": 2.32,
"learning_rate": 4.554873669314409e-06,
"loss": 0.0294,
"step": 108600
},
{
"epoch": 2.32,
"learning_rate": 4.540651244817705e-06,
"loss": 0.0337,
"step": 108700
},
{
"epoch": 2.32,
"learning_rate": 4.526428820321e-06,
"loss": 0.0268,
"step": 108800
},
{
"epoch": 2.32,
"learning_rate": 4.512206395824297e-06,
"loss": 0.0291,
"step": 108900
},
{
"epoch": 2.33,
"learning_rate": 4.497983971327592e-06,
"loss": 0.0335,
"step": 109000
},
{
"epoch": 2.33,
"learning_rate": 4.483761546830889e-06,
"loss": 0.0321,
"step": 109100
},
{
"epoch": 2.33,
"learning_rate": 4.4695391223341845e-06,
"loss": 0.0277,
"step": 109200
},
{
"epoch": 2.33,
"learning_rate": 4.45531669783748e-06,
"loss": 0.0316,
"step": 109300
},
{
"epoch": 2.33,
"learning_rate": 4.441094273340777e-06,
"loss": 0.033,
"step": 109400
},
{
"epoch": 2.34,
"learning_rate": 4.426871848844073e-06,
"loss": 0.0337,
"step": 109500
},
{
"epoch": 2.34,
"learning_rate": 4.412649424347369e-06,
"loss": 0.0315,
"step": 109600
},
{
"epoch": 2.34,
"learning_rate": 4.398426999850665e-06,
"loss": 0.0301,
"step": 109700
},
{
"epoch": 2.34,
"learning_rate": 4.384204575353961e-06,
"loss": 0.0265,
"step": 109800
},
{
"epoch": 2.34,
"learning_rate": 4.369982150857257e-06,
"loss": 0.0282,
"step": 109900
},
{
"epoch": 2.35,
"learning_rate": 4.355759726360553e-06,
"loss": 0.0347,
"step": 110000
},
{
"epoch": 2.35,
"eval_loss": 0.13324007391929626,
"eval_runtime": 34.3057,
"eval_samples_per_second": 145.748,
"eval_steps_per_second": 1.166,
"step": 110000
},
{
"epoch": 2.35,
"learning_rate": 4.341537301863849e-06,
"loss": 0.032,
"step": 110100
},
{
"epoch": 2.35,
"learning_rate": 4.327314877367145e-06,
"loss": 0.0281,
"step": 110200
},
{
"epoch": 2.35,
"learning_rate": 4.313092452870441e-06,
"loss": 0.0327,
"step": 110300
},
{
"epoch": 2.36,
"learning_rate": 4.298870028373737e-06,
"loss": 0.0325,
"step": 110400
},
{
"epoch": 2.36,
"learning_rate": 4.284647603877034e-06,
"loss": 0.0283,
"step": 110500
},
{
"epoch": 2.36,
"learning_rate": 4.270425179380329e-06,
"loss": 0.0346,
"step": 110600
},
{
"epoch": 2.36,
"learning_rate": 4.256202754883625e-06,
"loss": 0.036,
"step": 110700
},
{
"epoch": 2.36,
"learning_rate": 4.241980330386921e-06,
"loss": 0.0275,
"step": 110800
},
{
"epoch": 2.37,
"learning_rate": 4.227757905890218e-06,
"loss": 0.0329,
"step": 110900
},
{
"epoch": 2.37,
"learning_rate": 4.2135354813935135e-06,
"loss": 0.0332,
"step": 111000
},
{
"epoch": 2.37,
"learning_rate": 4.199313056896809e-06,
"loss": 0.0315,
"step": 111100
},
{
"epoch": 2.37,
"learning_rate": 4.185090632400105e-06,
"loss": 0.0302,
"step": 111200
},
{
"epoch": 2.37,
"learning_rate": 4.170868207903402e-06,
"loss": 0.0308,
"step": 111300
},
{
"epoch": 2.38,
"learning_rate": 4.156645783406698e-06,
"loss": 0.0311,
"step": 111400
},
{
"epoch": 2.38,
"learning_rate": 4.1424233589099935e-06,
"loss": 0.0295,
"step": 111500
},
{
"epoch": 2.38,
"learning_rate": 4.128200934413289e-06,
"loss": 0.037,
"step": 111600
},
{
"epoch": 2.38,
"learning_rate": 4.113978509916586e-06,
"loss": 0.0348,
"step": 111700
},
{
"epoch": 2.39,
"learning_rate": 4.099756085419882e-06,
"loss": 0.0331,
"step": 111800
},
{
"epoch": 2.39,
"learning_rate": 4.085533660923178e-06,
"loss": 0.0295,
"step": 111900
},
{
"epoch": 2.39,
"learning_rate": 4.071311236426474e-06,
"loss": 0.0311,
"step": 112000
},
{
"epoch": 2.39,
"learning_rate": 4.05708881192977e-06,
"loss": 0.0316,
"step": 112100
},
{
"epoch": 2.39,
"learning_rate": 4.042866387433066e-06,
"loss": 0.0292,
"step": 112200
},
{
"epoch": 2.4,
"learning_rate": 4.028643962936362e-06,
"loss": 0.0261,
"step": 112300
},
{
"epoch": 2.4,
"learning_rate": 4.014421538439658e-06,
"loss": 0.03,
"step": 112400
},
{
"epoch": 2.4,
"learning_rate": 4.000199113942954e-06,
"loss": 0.0324,
"step": 112500
},
{
"epoch": 2.4,
"learning_rate": 3.98597668944625e-06,
"loss": 0.0288,
"step": 112600
},
{
"epoch": 2.4,
"learning_rate": 3.971754264949546e-06,
"loss": 0.0274,
"step": 112700
},
{
"epoch": 2.41,
"learning_rate": 3.9575318404528426e-06,
"loss": 0.0288,
"step": 112800
},
{
"epoch": 2.41,
"learning_rate": 3.943309415956138e-06,
"loss": 0.0308,
"step": 112900
},
{
"epoch": 2.41,
"learning_rate": 3.929086991459434e-06,
"loss": 0.0284,
"step": 113000
},
{
"epoch": 2.41,
"learning_rate": 3.914864566962731e-06,
"loss": 0.0245,
"step": 113100
},
{
"epoch": 2.41,
"learning_rate": 3.900642142466026e-06,
"loss": 0.026,
"step": 113200
},
{
"epoch": 2.42,
"learning_rate": 3.8864197179693225e-06,
"loss": 0.0326,
"step": 113300
},
{
"epoch": 2.42,
"learning_rate": 3.872197293472618e-06,
"loss": 0.0321,
"step": 113400
},
{
"epoch": 2.42,
"learning_rate": 3.857974868975914e-06,
"loss": 0.0351,
"step": 113500
},
{
"epoch": 2.42,
"learning_rate": 3.843752444479211e-06,
"loss": 0.0362,
"step": 113600
},
{
"epoch": 2.43,
"learning_rate": 3.829530019982507e-06,
"loss": 0.0338,
"step": 113700
},
{
"epoch": 2.43,
"learning_rate": 3.8153075954858025e-06,
"loss": 0.032,
"step": 113800
},
{
"epoch": 2.43,
"learning_rate": 3.8010851709890987e-06,
"loss": 0.031,
"step": 113900
},
{
"epoch": 2.43,
"learning_rate": 3.7868627464923945e-06,
"loss": 0.0326,
"step": 114000
},
{
"epoch": 2.43,
"learning_rate": 3.7726403219956908e-06,
"loss": 0.0355,
"step": 114100
},
{
"epoch": 2.44,
"learning_rate": 3.758417897498987e-06,
"loss": 0.0285,
"step": 114200
},
{
"epoch": 2.44,
"learning_rate": 3.744195473002283e-06,
"loss": 0.0346,
"step": 114300
},
{
"epoch": 2.44,
"learning_rate": 3.729973048505579e-06,
"loss": 0.0369,
"step": 114400
},
{
"epoch": 2.44,
"learning_rate": 3.7157506240088753e-06,
"loss": 0.0324,
"step": 114500
},
{
"epoch": 2.44,
"learning_rate": 3.701528199512171e-06,
"loss": 0.0317,
"step": 114600
},
{
"epoch": 2.45,
"learning_rate": 3.6873057750154674e-06,
"loss": 0.0289,
"step": 114700
},
{
"epoch": 2.45,
"learning_rate": 3.6730833505187628e-06,
"loss": 0.0312,
"step": 114800
},
{
"epoch": 2.45,
"learning_rate": 3.658860926022059e-06,
"loss": 0.0322,
"step": 114900
},
{
"epoch": 2.45,
"learning_rate": 3.6446385015253557e-06,
"loss": 0.0266,
"step": 115000
},
{
"epoch": 2.46,
"learning_rate": 3.630416077028651e-06,
"loss": 0.0289,
"step": 115100
},
{
"epoch": 2.46,
"learning_rate": 3.6161936525319473e-06,
"loss": 0.036,
"step": 115200
},
{
"epoch": 2.46,
"learning_rate": 3.6019712280352436e-06,
"loss": 0.028,
"step": 115300
},
{
"epoch": 2.46,
"learning_rate": 3.5877488035385394e-06,
"loss": 0.029,
"step": 115400
},
{
"epoch": 2.46,
"learning_rate": 3.5735263790418356e-06,
"loss": 0.0314,
"step": 115500
},
{
"epoch": 2.47,
"learning_rate": 3.5593039545451315e-06,
"loss": 0.0227,
"step": 115600
},
{
"epoch": 2.47,
"learning_rate": 3.5450815300484277e-06,
"loss": 0.0317,
"step": 115700
},
{
"epoch": 2.47,
"learning_rate": 3.530859105551724e-06,
"loss": 0.0324,
"step": 115800
},
{
"epoch": 2.47,
"learning_rate": 3.5166366810550193e-06,
"loss": 0.0309,
"step": 115900
},
{
"epoch": 2.47,
"learning_rate": 3.5024142565583156e-06,
"loss": 0.0287,
"step": 116000
},
{
"epoch": 2.48,
"learning_rate": 3.4881918320616123e-06,
"loss": 0.027,
"step": 116100
},
{
"epoch": 2.48,
"learning_rate": 3.4739694075649077e-06,
"loss": 0.0331,
"step": 116200
},
{
"epoch": 2.48,
"learning_rate": 3.459746983068204e-06,
"loss": 0.0307,
"step": 116300
},
{
"epoch": 2.48,
"learning_rate": 3.4455245585714997e-06,
"loss": 0.0286,
"step": 116400
},
{
"epoch": 2.49,
"learning_rate": 3.431302134074796e-06,
"loss": 0.0332,
"step": 116500
},
{
"epoch": 2.49,
"learning_rate": 3.417079709578092e-06,
"loss": 0.0281,
"step": 116600
},
{
"epoch": 2.49,
"learning_rate": 3.402857285081388e-06,
"loss": 0.0323,
"step": 116700
},
{
"epoch": 2.49,
"learning_rate": 3.3886348605846843e-06,
"loss": 0.0303,
"step": 116800
},
{
"epoch": 2.49,
"learning_rate": 3.3744124360879805e-06,
"loss": 0.0336,
"step": 116900
},
{
"epoch": 2.5,
"learning_rate": 3.360190011591276e-06,
"loss": 0.0333,
"step": 117000
},
{
"epoch": 2.5,
"learning_rate": 3.3459675870945726e-06,
"loss": 0.0283,
"step": 117100
},
{
"epoch": 2.5,
"learning_rate": 3.331745162597868e-06,
"loss": 0.0312,
"step": 117200
},
{
"epoch": 2.5,
"learning_rate": 3.3175227381011642e-06,
"loss": 0.0319,
"step": 117300
},
{
"epoch": 2.5,
"learning_rate": 3.3033003136044605e-06,
"loss": 0.0281,
"step": 117400
},
{
"epoch": 2.51,
"learning_rate": 3.2890778891077563e-06,
"loss": 0.0331,
"step": 117500
},
{
"epoch": 2.51,
"learning_rate": 3.2748554646110525e-06,
"loss": 0.0348,
"step": 117600
},
{
"epoch": 2.51,
"learning_rate": 3.2606330401143488e-06,
"loss": 0.0245,
"step": 117700
},
{
"epoch": 2.51,
"learning_rate": 3.2464106156176446e-06,
"loss": 0.0279,
"step": 117800
},
{
"epoch": 2.52,
"learning_rate": 3.232188191120941e-06,
"loss": 0.0285,
"step": 117900
},
{
"epoch": 2.52,
"learning_rate": 3.2179657666242362e-06,
"loss": 0.0288,
"step": 118000
},
{
"epoch": 2.52,
"learning_rate": 3.2037433421275325e-06,
"loss": 0.0293,
"step": 118100
},
{
"epoch": 2.52,
"learning_rate": 3.189520917630829e-06,
"loss": 0.0296,
"step": 118200
},
{
"epoch": 2.52,
"learning_rate": 3.1752984931341245e-06,
"loss": 0.0303,
"step": 118300
},
{
"epoch": 2.53,
"learning_rate": 3.1610760686374208e-06,
"loss": 0.0309,
"step": 118400
},
{
"epoch": 2.53,
"learning_rate": 3.146853644140717e-06,
"loss": 0.0348,
"step": 118500
},
{
"epoch": 2.53,
"learning_rate": 3.132631219644013e-06,
"loss": 0.0299,
"step": 118600
},
{
"epoch": 2.53,
"learning_rate": 3.118408795147309e-06,
"loss": 0.0318,
"step": 118700
},
{
"epoch": 2.53,
"learning_rate": 3.104186370650605e-06,
"loss": 0.0266,
"step": 118800
},
{
"epoch": 2.54,
"learning_rate": 3.089963946153901e-06,
"loss": 0.0294,
"step": 118900
},
{
"epoch": 2.54,
"learning_rate": 3.0757415216571974e-06,
"loss": 0.0281,
"step": 119000
},
{
"epoch": 2.54,
"learning_rate": 3.061519097160493e-06,
"loss": 0.0251,
"step": 119100
},
{
"epoch": 2.54,
"learning_rate": 3.0472966726637895e-06,
"loss": 0.0288,
"step": 119200
},
{
"epoch": 2.55,
"learning_rate": 3.0330742481670857e-06,
"loss": 0.0313,
"step": 119300
},
{
"epoch": 2.55,
"learning_rate": 3.018851823670381e-06,
"loss": 0.0295,
"step": 119400
},
{
"epoch": 2.55,
"learning_rate": 3.0046293991736774e-06,
"loss": 0.0334,
"step": 119500
},
{
"epoch": 2.55,
"learning_rate": 2.990406974676973e-06,
"loss": 0.0243,
"step": 119600
},
{
"epoch": 2.55,
"learning_rate": 2.9761845501802694e-06,
"loss": 0.0335,
"step": 119700
},
{
"epoch": 2.56,
"learning_rate": 2.9619621256835657e-06,
"loss": 0.0327,
"step": 119800
},
{
"epoch": 2.56,
"learning_rate": 2.9477397011868615e-06,
"loss": 0.0304,
"step": 119900
},
{
"epoch": 2.56,
"learning_rate": 2.9335172766901577e-06,
"loss": 0.0302,
"step": 120000
},
{
"epoch": 2.56,
"eval_loss": 0.13005784153938293,
"eval_runtime": 34.1867,
"eval_samples_per_second": 146.256,
"eval_steps_per_second": 1.17,
"step": 120000
},
{
"epoch": 2.56,
"learning_rate": 2.919294852193454e-06,
"loss": 0.0274,
"step": 120100
},
{
"epoch": 2.56,
"learning_rate": 2.9050724276967494e-06,
"loss": 0.0289,
"step": 120200
},
{
"epoch": 2.57,
"learning_rate": 2.890850003200046e-06,
"loss": 0.0267,
"step": 120300
},
{
"epoch": 2.57,
"learning_rate": 2.8766275787033414e-06,
"loss": 0.0307,
"step": 120400
},
{
"epoch": 2.57,
"learning_rate": 2.8624051542066377e-06,
"loss": 0.0268,
"step": 120500
},
{
"epoch": 2.57,
"learning_rate": 2.848182729709934e-06,
"loss": 0.0314,
"step": 120600
},
{
"epoch": 2.57,
"learning_rate": 2.8339603052132297e-06,
"loss": 0.0306,
"step": 120700
},
{
"epoch": 2.58,
"learning_rate": 2.819737880716526e-06,
"loss": 0.03,
"step": 120800
},
{
"epoch": 2.58,
"learning_rate": 2.8055154562198222e-06,
"loss": 0.0276,
"step": 120900
},
{
"epoch": 2.58,
"learning_rate": 2.791293031723118e-06,
"loss": 0.0315,
"step": 121000
},
{
"epoch": 2.58,
"learning_rate": 2.7770706072264143e-06,
"loss": 0.0345,
"step": 121100
},
{
"epoch": 2.59,
"learning_rate": 2.7628481827297097e-06,
"loss": 0.03,
"step": 121200
},
{
"epoch": 2.59,
"learning_rate": 2.7486257582330064e-06,
"loss": 0.0265,
"step": 121300
},
{
"epoch": 2.59,
"learning_rate": 2.7344033337363026e-06,
"loss": 0.0317,
"step": 121400
},
{
"epoch": 2.59,
"learning_rate": 2.720180909239598e-06,
"loss": 0.0362,
"step": 121500
},
{
"epoch": 2.59,
"learning_rate": 2.7059584847428942e-06,
"loss": 0.0293,
"step": 121600
},
{
"epoch": 2.6,
"learning_rate": 2.6917360602461905e-06,
"loss": 0.0262,
"step": 121700
},
{
"epoch": 2.6,
"learning_rate": 2.6775136357494863e-06,
"loss": 0.0306,
"step": 121800
},
{
"epoch": 2.6,
"learning_rate": 2.6632912112527826e-06,
"loss": 0.0288,
"step": 121900
},
{
"epoch": 2.6,
"learning_rate": 2.6490687867560784e-06,
"loss": 0.0309,
"step": 122000
},
{
"epoch": 2.6,
"learning_rate": 2.6348463622593746e-06,
"loss": 0.0259,
"step": 122100
},
{
"epoch": 2.61,
"learning_rate": 2.620623937762671e-06,
"loss": 0.0288,
"step": 122200
},
{
"epoch": 2.61,
"learning_rate": 2.6064015132659663e-06,
"loss": 0.0359,
"step": 122300
},
{
"epoch": 2.61,
"learning_rate": 2.592179088769263e-06,
"loss": 0.0277,
"step": 122400
},
{
"epoch": 2.61,
"learning_rate": 2.577956664272559e-06,
"loss": 0.0314,
"step": 122500
},
{
"epoch": 2.62,
"learning_rate": 2.5637342397758546e-06,
"loss": 0.0277,
"step": 122600
},
{
"epoch": 2.62,
"learning_rate": 2.549511815279151e-06,
"loss": 0.0348,
"step": 122700
},
{
"epoch": 2.62,
"learning_rate": 2.5352893907824466e-06,
"loss": 0.0295,
"step": 122800
},
{
"epoch": 2.62,
"learning_rate": 2.521066966285743e-06,
"loss": 0.0325,
"step": 122900
},
{
"epoch": 2.62,
"learning_rate": 2.506844541789039e-06,
"loss": 0.032,
"step": 123000
},
{
"epoch": 2.63,
"learning_rate": 2.492622117292335e-06,
"loss": 0.028,
"step": 123100
},
{
"epoch": 2.63,
"learning_rate": 2.478399692795631e-06,
"loss": 0.0292,
"step": 123200
},
{
"epoch": 2.63,
"learning_rate": 2.464177268298927e-06,
"loss": 0.0348,
"step": 123300
},
{
"epoch": 2.63,
"learning_rate": 2.4499548438022232e-06,
"loss": 0.0271,
"step": 123400
},
{
"epoch": 2.63,
"learning_rate": 2.4357324193055195e-06,
"loss": 0.0285,
"step": 123500
},
{
"epoch": 2.64,
"learning_rate": 2.4215099948088153e-06,
"loss": 0.0278,
"step": 123600
},
{
"epoch": 2.64,
"learning_rate": 2.407287570312111e-06,
"loss": 0.0298,
"step": 123700
},
{
"epoch": 2.64,
"learning_rate": 2.3930651458154074e-06,
"loss": 0.0298,
"step": 123800
},
{
"epoch": 2.64,
"learning_rate": 2.3788427213187036e-06,
"loss": 0.0309,
"step": 123900
},
{
"epoch": 2.65,
"learning_rate": 2.3646202968219994e-06,
"loss": 0.0299,
"step": 124000
},
{
"epoch": 2.65,
"learning_rate": 2.3503978723252953e-06,
"loss": 0.0266,
"step": 124100
},
{
"epoch": 2.65,
"learning_rate": 2.3361754478285915e-06,
"loss": 0.0249,
"step": 124200
},
{
"epoch": 2.65,
"learning_rate": 2.3219530233318877e-06,
"loss": 0.0286,
"step": 124300
},
{
"epoch": 2.65,
"learning_rate": 2.3077305988351836e-06,
"loss": 0.0262,
"step": 124400
},
{
"epoch": 2.66,
"learning_rate": 2.29350817433848e-06,
"loss": 0.0276,
"step": 124500
},
{
"epoch": 2.66,
"learning_rate": 2.2792857498417756e-06,
"loss": 0.028,
"step": 124600
},
{
"epoch": 2.66,
"learning_rate": 2.265063325345072e-06,
"loss": 0.0287,
"step": 124700
},
{
"epoch": 2.66,
"learning_rate": 2.2508409008483677e-06,
"loss": 0.0313,
"step": 124800
},
{
"epoch": 2.66,
"learning_rate": 2.236618476351664e-06,
"loss": 0.0281,
"step": 124900
},
{
"epoch": 2.67,
"learning_rate": 2.2223960518549598e-06,
"loss": 0.0229,
"step": 125000
},
{
"epoch": 2.67,
"learning_rate": 2.208173627358256e-06,
"loss": 0.027,
"step": 125100
},
{
"epoch": 2.67,
"learning_rate": 2.193951202861552e-06,
"loss": 0.0279,
"step": 125200
},
{
"epoch": 2.67,
"learning_rate": 2.179728778364848e-06,
"loss": 0.0298,
"step": 125300
},
{
"epoch": 2.68,
"learning_rate": 2.165506353868144e-06,
"loss": 0.0295,
"step": 125400
},
{
"epoch": 2.68,
"learning_rate": 2.15128392937144e-06,
"loss": 0.0223,
"step": 125500
},
{
"epoch": 2.68,
"learning_rate": 2.1370615048747364e-06,
"loss": 0.0298,
"step": 125600
},
{
"epoch": 2.68,
"learning_rate": 2.122839080378032e-06,
"loss": 0.0322,
"step": 125700
},
{
"epoch": 2.68,
"learning_rate": 2.108616655881328e-06,
"loss": 0.0282,
"step": 125800
},
{
"epoch": 2.69,
"learning_rate": 2.0943942313846243e-06,
"loss": 0.0296,
"step": 125900
},
{
"epoch": 2.69,
"learning_rate": 2.0801718068879205e-06,
"loss": 0.0258,
"step": 126000
},
{
"epoch": 2.69,
"learning_rate": 2.0659493823912163e-06,
"loss": 0.0277,
"step": 126100
},
{
"epoch": 2.69,
"learning_rate": 2.051726957894512e-06,
"loss": 0.0285,
"step": 126200
},
{
"epoch": 2.69,
"learning_rate": 2.0375045333978084e-06,
"loss": 0.0314,
"step": 126300
},
{
"epoch": 2.7,
"learning_rate": 2.0232821089011046e-06,
"loss": 0.0312,
"step": 126400
},
{
"epoch": 2.7,
"learning_rate": 2.0090596844044005e-06,
"loss": 0.0285,
"step": 126500
},
{
"epoch": 2.7,
"learning_rate": 1.9948372599076967e-06,
"loss": 0.0291,
"step": 126600
},
{
"epoch": 2.7,
"learning_rate": 1.980614835410993e-06,
"loss": 0.0253,
"step": 126700
},
{
"epoch": 2.71,
"learning_rate": 1.9663924109142888e-06,
"loss": 0.0242,
"step": 126800
},
{
"epoch": 2.71,
"learning_rate": 1.9521699864175846e-06,
"loss": 0.0316,
"step": 126900
},
{
"epoch": 2.71,
"learning_rate": 1.937947561920881e-06,
"loss": 0.0305,
"step": 127000
},
{
"epoch": 2.71,
"learning_rate": 1.923725137424177e-06,
"loss": 0.0244,
"step": 127100
},
{
"epoch": 2.71,
"learning_rate": 1.909502712927473e-06,
"loss": 0.0263,
"step": 127200
},
{
"epoch": 2.72,
"learning_rate": 1.895280288430769e-06,
"loss": 0.0281,
"step": 127300
},
{
"epoch": 2.72,
"learning_rate": 1.881057863934065e-06,
"loss": 0.027,
"step": 127400
},
{
"epoch": 2.72,
"learning_rate": 1.8668354394373612e-06,
"loss": 0.0265,
"step": 127500
},
{
"epoch": 2.72,
"learning_rate": 1.852613014940657e-06,
"loss": 0.0302,
"step": 127600
},
{
"epoch": 2.72,
"learning_rate": 1.838390590443953e-06,
"loss": 0.0273,
"step": 127700
},
{
"epoch": 2.73,
"learning_rate": 1.824168165947249e-06,
"loss": 0.0243,
"step": 127800
},
{
"epoch": 2.73,
"learning_rate": 1.8099457414505453e-06,
"loss": 0.0246,
"step": 127900
},
{
"epoch": 2.73,
"learning_rate": 1.7957233169538414e-06,
"loss": 0.0246,
"step": 128000
},
{
"epoch": 2.73,
"learning_rate": 1.7815008924571372e-06,
"loss": 0.0307,
"step": 128100
},
{
"epoch": 2.73,
"learning_rate": 1.7672784679604332e-06,
"loss": 0.0255,
"step": 128200
},
{
"epoch": 2.74,
"learning_rate": 1.7530560434637295e-06,
"loss": 0.0231,
"step": 128300
},
{
"epoch": 2.74,
"learning_rate": 1.7388336189670255e-06,
"loss": 0.0261,
"step": 128400
},
{
"epoch": 2.74,
"learning_rate": 1.7246111944703215e-06,
"loss": 0.0277,
"step": 128500
},
{
"epoch": 2.74,
"learning_rate": 1.7103887699736173e-06,
"loss": 0.0289,
"step": 128600
},
{
"epoch": 2.75,
"learning_rate": 1.6961663454769136e-06,
"loss": 0.0272,
"step": 128700
},
{
"epoch": 2.75,
"learning_rate": 1.6819439209802096e-06,
"loss": 0.0304,
"step": 128800
},
{
"epoch": 2.75,
"learning_rate": 1.6677214964835057e-06,
"loss": 0.0253,
"step": 128900
},
{
"epoch": 2.75,
"learning_rate": 1.6534990719868017e-06,
"loss": 0.0289,
"step": 129000
},
{
"epoch": 2.75,
"learning_rate": 1.639276647490098e-06,
"loss": 0.0287,
"step": 129100
},
{
"epoch": 2.76,
"learning_rate": 1.6250542229933938e-06,
"loss": 0.0302,
"step": 129200
},
{
"epoch": 2.76,
"learning_rate": 1.6108317984966898e-06,
"loss": 0.0227,
"step": 129300
},
{
"epoch": 2.76,
"learning_rate": 1.5966093739999858e-06,
"loss": 0.0302,
"step": 129400
},
{
"epoch": 2.76,
"learning_rate": 1.582386949503282e-06,
"loss": 0.0242,
"step": 129500
},
{
"epoch": 2.76,
"learning_rate": 1.568164525006578e-06,
"loss": 0.0281,
"step": 129600
},
{
"epoch": 2.77,
"learning_rate": 1.553942100509874e-06,
"loss": 0.032,
"step": 129700
},
{
"epoch": 2.77,
"learning_rate": 1.53971967601317e-06,
"loss": 0.0251,
"step": 129800
},
{
"epoch": 2.77,
"learning_rate": 1.5254972515164662e-06,
"loss": 0.0262,
"step": 129900
},
{
"epoch": 2.77,
"learning_rate": 1.5112748270197622e-06,
"loss": 0.029,
"step": 130000
},
{
"epoch": 2.77,
"eval_loss": 0.12821832299232483,
"eval_runtime": 34.334,
"eval_samples_per_second": 145.628,
"eval_steps_per_second": 1.165,
"step": 130000
},
{
"epoch": 2.78,
"learning_rate": 1.4970524025230583e-06,
"loss": 0.0269,
"step": 130100
},
{
"epoch": 2.78,
"learning_rate": 1.482829978026354e-06,
"loss": 0.0303,
"step": 130200
},
{
"epoch": 2.78,
"learning_rate": 1.4686075535296503e-06,
"loss": 0.0309,
"step": 130300
},
{
"epoch": 2.78,
"learning_rate": 1.4543851290329464e-06,
"loss": 0.0262,
"step": 130400
},
{
"epoch": 2.78,
"learning_rate": 1.4401627045362424e-06,
"loss": 0.0252,
"step": 130500
},
{
"epoch": 2.79,
"learning_rate": 1.4259402800395384e-06,
"loss": 0.024,
"step": 130600
},
{
"epoch": 2.79,
"learning_rate": 1.4117178555428347e-06,
"loss": 0.0292,
"step": 130700
},
{
"epoch": 2.79,
"learning_rate": 1.3974954310461305e-06,
"loss": 0.0264,
"step": 130800
},
{
"epoch": 2.79,
"learning_rate": 1.3832730065494265e-06,
"loss": 0.0245,
"step": 130900
},
{
"epoch": 2.79,
"learning_rate": 1.3690505820527225e-06,
"loss": 0.0281,
"step": 131000
},
{
"epoch": 2.8,
"learning_rate": 1.3548281575560188e-06,
"loss": 0.0302,
"step": 131100
},
{
"epoch": 2.8,
"learning_rate": 1.3406057330593148e-06,
"loss": 0.0278,
"step": 131200
},
{
"epoch": 2.8,
"learning_rate": 1.3263833085626106e-06,
"loss": 0.0277,
"step": 131300
},
{
"epoch": 2.8,
"learning_rate": 1.3121608840659067e-06,
"loss": 0.0252,
"step": 131400
},
{
"epoch": 2.81,
"learning_rate": 1.297938459569203e-06,
"loss": 0.027,
"step": 131500
},
{
"epoch": 2.81,
"learning_rate": 1.283716035072499e-06,
"loss": 0.0276,
"step": 131600
},
{
"epoch": 2.81,
"learning_rate": 1.269493610575795e-06,
"loss": 0.0234,
"step": 131700
},
{
"epoch": 2.81,
"learning_rate": 1.2552711860790908e-06,
"loss": 0.0285,
"step": 131800
},
{
"epoch": 2.81,
"learning_rate": 1.241048761582387e-06,
"loss": 0.0298,
"step": 131900
},
{
"epoch": 2.82,
"learning_rate": 1.226826337085683e-06,
"loss": 0.027,
"step": 132000
},
{
"epoch": 2.82,
"learning_rate": 1.2126039125889791e-06,
"loss": 0.0309,
"step": 132100
},
{
"epoch": 2.82,
"learning_rate": 1.1983814880922751e-06,
"loss": 0.0257,
"step": 132200
},
{
"epoch": 2.82,
"learning_rate": 1.1841590635955712e-06,
"loss": 0.0263,
"step": 132300
},
{
"epoch": 2.82,
"learning_rate": 1.1699366390988672e-06,
"loss": 0.024,
"step": 132400
},
{
"epoch": 2.83,
"learning_rate": 1.1557142146021632e-06,
"loss": 0.0313,
"step": 132500
},
{
"epoch": 2.83,
"learning_rate": 1.1414917901054593e-06,
"loss": 0.0214,
"step": 132600
},
{
"epoch": 2.83,
"learning_rate": 1.1272693656087553e-06,
"loss": 0.027,
"step": 132700
},
{
"epoch": 2.83,
"learning_rate": 1.1130469411120516e-06,
"loss": 0.0286,
"step": 132800
},
{
"epoch": 2.84,
"learning_rate": 1.0988245166153474e-06,
"loss": 0.0242,
"step": 132900
},
{
"epoch": 2.84,
"learning_rate": 1.0846020921186436e-06,
"loss": 0.0314,
"step": 133000
},
{
"epoch": 2.84,
"learning_rate": 1.0703796676219394e-06,
"loss": 0.0247,
"step": 133100
},
{
"epoch": 2.84,
"learning_rate": 1.0561572431252357e-06,
"loss": 0.0281,
"step": 133200
},
{
"epoch": 2.84,
"learning_rate": 1.0419348186285317e-06,
"loss": 0.0214,
"step": 133300
},
{
"epoch": 2.85,
"learning_rate": 1.0277123941318277e-06,
"loss": 0.0313,
"step": 133400
},
{
"epoch": 2.85,
"learning_rate": 1.0134899696351238e-06,
"loss": 0.0291,
"step": 133500
},
{
"epoch": 2.85,
"learning_rate": 9.992675451384198e-07,
"loss": 0.0268,
"step": 133600
},
{
"epoch": 2.85,
"learning_rate": 9.850451206417158e-07,
"loss": 0.0285,
"step": 133700
},
{
"epoch": 2.85,
"learning_rate": 9.708226961450119e-07,
"loss": 0.0266,
"step": 133800
},
{
"epoch": 2.86,
"learning_rate": 9.56600271648308e-07,
"loss": 0.0266,
"step": 133900
},
{
"epoch": 2.86,
"learning_rate": 9.42377847151604e-07,
"loss": 0.0226,
"step": 134000
},
{
"epoch": 2.86,
"learning_rate": 9.281554226549e-07,
"loss": 0.0273,
"step": 134100
},
{
"epoch": 2.86,
"learning_rate": 9.139329981581961e-07,
"loss": 0.0278,
"step": 134200
},
{
"epoch": 2.87,
"learning_rate": 8.99710573661492e-07,
"loss": 0.031,
"step": 134300
},
{
"epoch": 2.87,
"learning_rate": 8.854881491647882e-07,
"loss": 0.0274,
"step": 134400
},
{
"epoch": 2.87,
"learning_rate": 8.712657246680842e-07,
"loss": 0.0245,
"step": 134500
},
{
"epoch": 2.87,
"learning_rate": 8.570433001713803e-07,
"loss": 0.0264,
"step": 134600
},
{
"epoch": 2.87,
"learning_rate": 8.428208756746763e-07,
"loss": 0.0314,
"step": 134700
},
{
"epoch": 2.88,
"learning_rate": 8.285984511779724e-07,
"loss": 0.0283,
"step": 134800
},
{
"epoch": 2.88,
"learning_rate": 8.143760266812683e-07,
"loss": 0.0281,
"step": 134900
},
{
"epoch": 2.88,
"learning_rate": 8.001536021845645e-07,
"loss": 0.0278,
"step": 135000
},
{
"epoch": 2.88,
"learning_rate": 7.859311776878604e-07,
"loss": 0.0334,
"step": 135100
},
{
"epoch": 2.88,
"learning_rate": 7.717087531911565e-07,
"loss": 0.0235,
"step": 135200
},
{
"epoch": 2.89,
"learning_rate": 7.574863286944526e-07,
"loss": 0.0337,
"step": 135300
},
{
"epoch": 2.89,
"learning_rate": 7.432639041977487e-07,
"loss": 0.0221,
"step": 135400
},
{
"epoch": 2.89,
"learning_rate": 7.290414797010446e-07,
"loss": 0.0293,
"step": 135500
},
{
"epoch": 2.89,
"learning_rate": 7.148190552043408e-07,
"loss": 0.0227,
"step": 135600
},
{
"epoch": 2.89,
"learning_rate": 7.005966307076367e-07,
"loss": 0.0248,
"step": 135700
},
{
"epoch": 2.9,
"learning_rate": 6.863742062109328e-07,
"loss": 0.0291,
"step": 135800
},
{
"epoch": 2.9,
"learning_rate": 6.721517817142289e-07,
"loss": 0.0261,
"step": 135900
},
{
"epoch": 2.9,
"learning_rate": 6.579293572175249e-07,
"loss": 0.029,
"step": 136000
},
{
"epoch": 2.9,
"learning_rate": 6.437069327208209e-07,
"loss": 0.0266,
"step": 136100
},
{
"epoch": 2.91,
"learning_rate": 6.294845082241171e-07,
"loss": 0.0277,
"step": 136200
},
{
"epoch": 2.91,
"learning_rate": 6.152620837274131e-07,
"loss": 0.0231,
"step": 136300
},
{
"epoch": 2.91,
"learning_rate": 6.010396592307091e-07,
"loss": 0.0284,
"step": 136400
},
{
"epoch": 2.91,
"learning_rate": 5.868172347340052e-07,
"loss": 0.0269,
"step": 136500
},
{
"epoch": 2.91,
"learning_rate": 5.725948102373012e-07,
"loss": 0.0254,
"step": 136600
},
{
"epoch": 2.92,
"learning_rate": 5.583723857405972e-07,
"loss": 0.0267,
"step": 136700
},
{
"epoch": 2.92,
"learning_rate": 5.441499612438933e-07,
"loss": 0.0254,
"step": 136800
},
{
"epoch": 2.92,
"learning_rate": 5.299275367471893e-07,
"loss": 0.0271,
"step": 136900
},
{
"epoch": 2.92,
"learning_rate": 5.157051122504854e-07,
"loss": 0.0281,
"step": 137000
},
{
"epoch": 2.92,
"learning_rate": 5.014826877537815e-07,
"loss": 0.0326,
"step": 137100
},
{
"epoch": 2.93,
"learning_rate": 4.872602632570775e-07,
"loss": 0.0275,
"step": 137200
},
{
"epoch": 2.93,
"learning_rate": 4.7303783876037353e-07,
"loss": 0.0312,
"step": 137300
},
{
"epoch": 2.93,
"learning_rate": 4.5881541426366957e-07,
"loss": 0.0265,
"step": 137400
},
{
"epoch": 2.93,
"learning_rate": 4.445929897669656e-07,
"loss": 0.0282,
"step": 137500
},
{
"epoch": 2.94,
"learning_rate": 4.303705652702617e-07,
"loss": 0.0279,
"step": 137600
},
{
"epoch": 2.94,
"learning_rate": 4.161481407735577e-07,
"loss": 0.0283,
"step": 137700
},
{
"epoch": 2.94,
"learning_rate": 4.0192571627685375e-07,
"loss": 0.0271,
"step": 137800
},
{
"epoch": 2.94,
"learning_rate": 3.877032917801498e-07,
"loss": 0.0297,
"step": 137900
},
{
"epoch": 2.94,
"learning_rate": 3.7348086728344587e-07,
"loss": 0.0288,
"step": 138000
},
{
"epoch": 2.95,
"learning_rate": 3.592584427867419e-07,
"loss": 0.0259,
"step": 138100
},
{
"epoch": 2.95,
"learning_rate": 3.4503601829003793e-07,
"loss": 0.0293,
"step": 138200
},
{
"epoch": 2.95,
"learning_rate": 3.3081359379333396e-07,
"loss": 0.0228,
"step": 138300
},
{
"epoch": 2.95,
"learning_rate": 3.1659116929663005e-07,
"loss": 0.0234,
"step": 138400
},
{
"epoch": 2.95,
"learning_rate": 3.023687447999261e-07,
"loss": 0.0228,
"step": 138500
},
{
"epoch": 2.96,
"learning_rate": 2.881463203032221e-07,
"loss": 0.0196,
"step": 138600
},
{
"epoch": 2.96,
"learning_rate": 2.7392389580651815e-07,
"loss": 0.0264,
"step": 138700
},
{
"epoch": 2.96,
"learning_rate": 2.5970147130981423e-07,
"loss": 0.029,
"step": 138800
},
{
"epoch": 2.96,
"learning_rate": 2.4547904681311026e-07,
"loss": 0.0236,
"step": 138900
},
{
"epoch": 2.97,
"learning_rate": 2.312566223164063e-07,
"loss": 0.0216,
"step": 139000
},
{
"epoch": 2.97,
"learning_rate": 2.1703419781970235e-07,
"loss": 0.0275,
"step": 139100
},
{
"epoch": 2.97,
"learning_rate": 2.0281177332299839e-07,
"loss": 0.0243,
"step": 139200
},
{
"epoch": 2.97,
"learning_rate": 1.8858934882629444e-07,
"loss": 0.027,
"step": 139300
},
{
"epoch": 2.97,
"learning_rate": 1.7436692432959048e-07,
"loss": 0.033,
"step": 139400
},
{
"epoch": 2.98,
"learning_rate": 1.6014449983288654e-07,
"loss": 0.0267,
"step": 139500
},
{
"epoch": 2.98,
"learning_rate": 1.4592207533618257e-07,
"loss": 0.0283,
"step": 139600
},
{
"epoch": 2.98,
"learning_rate": 1.3169965083947863e-07,
"loss": 0.0244,
"step": 139700
},
{
"epoch": 2.98,
"learning_rate": 1.1747722634277467e-07,
"loss": 0.03,
"step": 139800
},
{
"epoch": 2.98,
"learning_rate": 1.0325480184607072e-07,
"loss": 0.0277,
"step": 139900
},
{
"epoch": 2.99,
"learning_rate": 8.903237734936676e-08,
"loss": 0.03,
"step": 140000
},
{
"epoch": 2.99,
"eval_loss": 0.1257346123456955,
"eval_runtime": 34.2432,
"eval_samples_per_second": 146.014,
"eval_steps_per_second": 1.168,
"step": 140000
}
],
"logging_steps": 100,
"max_steps": 140625,
"num_train_epochs": 3,
"save_steps": 10000,
"total_flos": 3822663594147840.0,
"trial_name": null,
"trial_params": null
}