José Ángel González
add model
9e49bf2
{
"best_metric": 1.4905033111572266,
"best_model_checkpoint": "./checkpoints/mbarthez-davide_articles-copy_enhanced/checkpoint-100656",
"epoch": 3.0,
"global_step": 100656,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.9971685741535528e-05,
"loss": 2.9916,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 2.9941881258941343e-05,
"loss": 2.4185,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 2.9912374821173107e-05,
"loss": 2.336,
"step": 300
},
{
"epoch": 0.01,
"learning_rate": 2.9882570338578922e-05,
"loss": 2.2868,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 2.985276585598474e-05,
"loss": 2.2529,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 2.98232594182165e-05,
"loss": 2.2467,
"step": 600
},
{
"epoch": 0.02,
"learning_rate": 2.9793454935622317e-05,
"loss": 2.1735,
"step": 700
},
{
"epoch": 0.02,
"learning_rate": 2.9763650453028136e-05,
"loss": 2.2049,
"step": 800
},
{
"epoch": 0.03,
"learning_rate": 2.9734442060085837e-05,
"loss": 2.1168,
"step": 900
},
{
"epoch": 0.03,
"learning_rate": 2.9704637577491656e-05,
"loss": 2.1353,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 2.9674833094897475e-05,
"loss": 2.0458,
"step": 1100
},
{
"epoch": 0.04,
"learning_rate": 2.964502861230329e-05,
"loss": 2.1209,
"step": 1200
},
{
"epoch": 0.04,
"learning_rate": 2.961552217453505e-05,
"loss": 2.0867,
"step": 1300
},
{
"epoch": 0.04,
"learning_rate": 2.958571769194087e-05,
"loss": 2.028,
"step": 1400
},
{
"epoch": 0.04,
"learning_rate": 2.9555913209346685e-05,
"loss": 2.0236,
"step": 1500
},
{
"epoch": 0.05,
"learning_rate": 2.9526108726752504e-05,
"loss": 2.1211,
"step": 1600
},
{
"epoch": 0.05,
"learning_rate": 2.9496602288984264e-05,
"loss": 2.0742,
"step": 1700
},
{
"epoch": 0.05,
"learning_rate": 2.9466797806390083e-05,
"loss": 2.0753,
"step": 1800
},
{
"epoch": 0.06,
"learning_rate": 2.94369933237959e-05,
"loss": 2.0278,
"step": 1900
},
{
"epoch": 0.06,
"learning_rate": 2.9407188841201717e-05,
"loss": 2.0408,
"step": 2000
},
{
"epoch": 0.06,
"learning_rate": 2.9377384358607533e-05,
"loss": 2.0124,
"step": 2100
},
{
"epoch": 0.07,
"learning_rate": 2.9347579876013352e-05,
"loss": 1.9951,
"step": 2200
},
{
"epoch": 0.07,
"learning_rate": 2.931777539341917e-05,
"loss": 1.9751,
"step": 2300
},
{
"epoch": 0.07,
"learning_rate": 2.928826895565093e-05,
"loss": 2.0204,
"step": 2400
},
{
"epoch": 0.07,
"learning_rate": 2.925846447305675e-05,
"loss": 1.9919,
"step": 2500
},
{
"epoch": 0.08,
"learning_rate": 2.9228659990462566e-05,
"loss": 2.0126,
"step": 2600
},
{
"epoch": 0.08,
"learning_rate": 2.919885550786838e-05,
"loss": 2.0276,
"step": 2700
},
{
"epoch": 0.08,
"learning_rate": 2.91690510252742e-05,
"loss": 2.0272,
"step": 2800
},
{
"epoch": 0.09,
"learning_rate": 2.913924654268002e-05,
"loss": 1.9608,
"step": 2900
},
{
"epoch": 0.09,
"learning_rate": 2.910944206008584e-05,
"loss": 2.0019,
"step": 3000
},
{
"epoch": 0.09,
"learning_rate": 2.9079637577491654e-05,
"loss": 1.9454,
"step": 3100
},
{
"epoch": 0.1,
"learning_rate": 2.9049833094897473e-05,
"loss": 1.9716,
"step": 3200
},
{
"epoch": 0.1,
"learning_rate": 2.9020028612303292e-05,
"loss": 1.9543,
"step": 3300
},
{
"epoch": 0.1,
"learning_rate": 2.8990224129709108e-05,
"loss": 1.9659,
"step": 3400
},
{
"epoch": 0.1,
"learning_rate": 2.8960419647114927e-05,
"loss": 1.9729,
"step": 3500
},
{
"epoch": 0.11,
"learning_rate": 2.8930615164520746e-05,
"loss": 1.992,
"step": 3600
},
{
"epoch": 0.11,
"learning_rate": 2.8900810681926565e-05,
"loss": 1.9147,
"step": 3700
},
{
"epoch": 0.11,
"learning_rate": 2.887100619933238e-05,
"loss": 1.8892,
"step": 3800
},
{
"epoch": 0.12,
"learning_rate": 2.88412017167382e-05,
"loss": 1.941,
"step": 3900
},
{
"epoch": 0.12,
"learning_rate": 2.881139723414402e-05,
"loss": 1.9463,
"step": 4000
},
{
"epoch": 0.12,
"learning_rate": 2.878159275154983e-05,
"loss": 1.9645,
"step": 4100
},
{
"epoch": 0.13,
"learning_rate": 2.875178826895565e-05,
"loss": 1.9414,
"step": 4200
},
{
"epoch": 0.13,
"learning_rate": 2.872198378636147e-05,
"loss": 1.9317,
"step": 4300
},
{
"epoch": 0.13,
"learning_rate": 2.8692179303767288e-05,
"loss": 1.8677,
"step": 4400
},
{
"epoch": 0.13,
"learning_rate": 2.8662374821173103e-05,
"loss": 1.9439,
"step": 4500
},
{
"epoch": 0.14,
"learning_rate": 2.8632570338578922e-05,
"loss": 1.8576,
"step": 4600
},
{
"epoch": 0.14,
"learning_rate": 2.860276585598474e-05,
"loss": 1.9448,
"step": 4700
},
{
"epoch": 0.14,
"learning_rate": 2.8572961373390557e-05,
"loss": 1.8824,
"step": 4800
},
{
"epoch": 0.15,
"learning_rate": 2.8543156890796376e-05,
"loss": 1.9521,
"step": 4900
},
{
"epoch": 0.15,
"learning_rate": 2.8513352408202195e-05,
"loss": 1.9182,
"step": 5000
},
{
"epoch": 0.15,
"learning_rate": 2.8483547925608014e-05,
"loss": 1.941,
"step": 5100
},
{
"epoch": 0.15,
"learning_rate": 2.845374344301383e-05,
"loss": 1.9335,
"step": 5200
},
{
"epoch": 0.16,
"learning_rate": 2.842393896041965e-05,
"loss": 1.9182,
"step": 5300
},
{
"epoch": 0.16,
"learning_rate": 2.8394134477825468e-05,
"loss": 1.9045,
"step": 5400
},
{
"epoch": 0.16,
"learning_rate": 2.8364329995231287e-05,
"loss": 1.8742,
"step": 5500
},
{
"epoch": 0.17,
"learning_rate": 2.83345255126371e-05,
"loss": 1.8348,
"step": 5600
},
{
"epoch": 0.17,
"learning_rate": 2.8304721030042918e-05,
"loss": 1.8538,
"step": 5700
},
{
"epoch": 0.17,
"learning_rate": 2.8274916547448737e-05,
"loss": 1.8849,
"step": 5800
},
{
"epoch": 0.18,
"learning_rate": 2.8245112064854553e-05,
"loss": 1.9078,
"step": 5900
},
{
"epoch": 0.18,
"learning_rate": 2.821530758226037e-05,
"loss": 1.8603,
"step": 6000
},
{
"epoch": 0.18,
"learning_rate": 2.818550309966619e-05,
"loss": 1.8936,
"step": 6100
},
{
"epoch": 0.18,
"learning_rate": 2.815569861707201e-05,
"loss": 1.8688,
"step": 6200
},
{
"epoch": 0.19,
"learning_rate": 2.8125894134477825e-05,
"loss": 1.8588,
"step": 6300
},
{
"epoch": 0.19,
"learning_rate": 2.8096089651883644e-05,
"loss": 1.8781,
"step": 6400
},
{
"epoch": 0.19,
"learning_rate": 2.8066285169289463e-05,
"loss": 1.8708,
"step": 6500
},
{
"epoch": 0.2,
"learning_rate": 2.803648068669528e-05,
"loss": 1.8755,
"step": 6600
},
{
"epoch": 0.2,
"learning_rate": 2.8006676204101098e-05,
"loss": 1.9063,
"step": 6700
},
{
"epoch": 0.2,
"learning_rate": 2.7976871721506917e-05,
"loss": 1.8432,
"step": 6800
},
{
"epoch": 0.21,
"learning_rate": 2.7947067238912736e-05,
"loss": 1.8905,
"step": 6900
},
{
"epoch": 0.21,
"learning_rate": 2.791726275631855e-05,
"loss": 1.8297,
"step": 7000
},
{
"epoch": 0.21,
"learning_rate": 2.7887458273724367e-05,
"loss": 1.8125,
"step": 7100
},
{
"epoch": 0.21,
"learning_rate": 2.7857653791130186e-05,
"loss": 1.8585,
"step": 7200
},
{
"epoch": 0.22,
"learning_rate": 2.7827849308536002e-05,
"loss": 1.8668,
"step": 7300
},
{
"epoch": 0.22,
"learning_rate": 2.779804482594182e-05,
"loss": 1.8574,
"step": 7400
},
{
"epoch": 0.22,
"learning_rate": 2.776824034334764e-05,
"loss": 1.8183,
"step": 7500
},
{
"epoch": 0.23,
"learning_rate": 2.773843586075346e-05,
"loss": 1.8725,
"step": 7600
},
{
"epoch": 0.23,
"learning_rate": 2.7708631378159275e-05,
"loss": 1.8529,
"step": 7700
},
{
"epoch": 0.23,
"learning_rate": 2.7678826895565094e-05,
"loss": 1.8526,
"step": 7800
},
{
"epoch": 0.24,
"learning_rate": 2.7649022412970913e-05,
"loss": 1.8428,
"step": 7900
},
{
"epoch": 0.24,
"learning_rate": 2.7619217930376728e-05,
"loss": 1.845,
"step": 8000
},
{
"epoch": 0.24,
"learning_rate": 2.7589711492608488e-05,
"loss": 1.8325,
"step": 8100
},
{
"epoch": 0.24,
"learning_rate": 2.7559907010014307e-05,
"loss": 1.821,
"step": 8200
},
{
"epoch": 0.25,
"learning_rate": 2.7530400572246067e-05,
"loss": 1.9171,
"step": 8300
},
{
"epoch": 0.25,
"learning_rate": 2.7500596089651886e-05,
"loss": 1.868,
"step": 8400
},
{
"epoch": 0.25,
"learning_rate": 2.7470791607057705e-05,
"loss": 1.8178,
"step": 8500
},
{
"epoch": 0.26,
"learning_rate": 2.7440987124463517e-05,
"loss": 1.8377,
"step": 8600
},
{
"epoch": 0.26,
"learning_rate": 2.7411182641869336e-05,
"loss": 1.8537,
"step": 8700
},
{
"epoch": 0.26,
"learning_rate": 2.7381378159275155e-05,
"loss": 1.845,
"step": 8800
},
{
"epoch": 0.27,
"learning_rate": 2.735157367668097e-05,
"loss": 1.8144,
"step": 8900
},
{
"epoch": 0.27,
"learning_rate": 2.732176919408679e-05,
"loss": 1.8261,
"step": 9000
},
{
"epoch": 0.27,
"learning_rate": 2.729196471149261e-05,
"loss": 1.8569,
"step": 9100
},
{
"epoch": 0.27,
"learning_rate": 2.7262160228898428e-05,
"loss": 1.8529,
"step": 9200
},
{
"epoch": 0.28,
"learning_rate": 2.7232355746304244e-05,
"loss": 1.7626,
"step": 9300
},
{
"epoch": 0.28,
"learning_rate": 2.7202551263710063e-05,
"loss": 1.8715,
"step": 9400
},
{
"epoch": 0.28,
"learning_rate": 2.7172746781115882e-05,
"loss": 1.7882,
"step": 9500
},
{
"epoch": 0.29,
"learning_rate": 2.7142942298521697e-05,
"loss": 1.8134,
"step": 9600
},
{
"epoch": 0.29,
"learning_rate": 2.7113137815927516e-05,
"loss": 1.7804,
"step": 9700
},
{
"epoch": 0.29,
"learning_rate": 2.7083333333333335e-05,
"loss": 1.8603,
"step": 9800
},
{
"epoch": 0.3,
"learning_rate": 2.7053826895565092e-05,
"loss": 1.7646,
"step": 9900
},
{
"epoch": 0.3,
"learning_rate": 2.702402241297091e-05,
"loss": 1.8108,
"step": 10000
},
{
"epoch": 0.3,
"learning_rate": 2.699421793037673e-05,
"loss": 1.7624,
"step": 10100
},
{
"epoch": 0.3,
"learning_rate": 2.696441344778255e-05,
"loss": 1.746,
"step": 10200
},
{
"epoch": 0.31,
"learning_rate": 2.6934608965188365e-05,
"loss": 1.7821,
"step": 10300
},
{
"epoch": 0.31,
"learning_rate": 2.6904804482594184e-05,
"loss": 1.8216,
"step": 10400
},
{
"epoch": 0.31,
"learning_rate": 2.6875000000000003e-05,
"loss": 1.8033,
"step": 10500
},
{
"epoch": 0.32,
"learning_rate": 2.684519551740582e-05,
"loss": 1.7954,
"step": 10600
},
{
"epoch": 0.32,
"learning_rate": 2.6815391034811637e-05,
"loss": 1.8692,
"step": 10700
},
{
"epoch": 0.32,
"learning_rate": 2.6785586552217456e-05,
"loss": 1.789,
"step": 10800
},
{
"epoch": 0.32,
"learning_rate": 2.6755782069623272e-05,
"loss": 1.7989,
"step": 10900
},
{
"epoch": 0.33,
"learning_rate": 2.6726275631855032e-05,
"loss": 1.7977,
"step": 11000
},
{
"epoch": 0.33,
"learning_rate": 2.669647114926085e-05,
"loss": 1.7815,
"step": 11100
},
{
"epoch": 0.33,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.7803,
"step": 11200
},
{
"epoch": 0.34,
"learning_rate": 2.6636862184072486e-05,
"loss": 1.7956,
"step": 11300
},
{
"epoch": 0.34,
"learning_rate": 2.6607057701478305e-05,
"loss": 1.8058,
"step": 11400
},
{
"epoch": 0.34,
"learning_rate": 2.6577253218884124e-05,
"loss": 1.8262,
"step": 11500
},
{
"epoch": 0.35,
"learning_rate": 2.654744873628994e-05,
"loss": 1.8004,
"step": 11600
},
{
"epoch": 0.35,
"learning_rate": 2.6517644253695755e-05,
"loss": 1.7565,
"step": 11700
},
{
"epoch": 0.35,
"learning_rate": 2.6487839771101574e-05,
"loss": 1.8006,
"step": 11800
},
{
"epoch": 0.35,
"learning_rate": 2.645803528850739e-05,
"loss": 1.8004,
"step": 11900
},
{
"epoch": 0.36,
"learning_rate": 2.642823080591321e-05,
"loss": 1.7985,
"step": 12000
},
{
"epoch": 0.36,
"learning_rate": 2.6398426323319028e-05,
"loss": 1.7983,
"step": 12100
},
{
"epoch": 0.36,
"learning_rate": 2.6368621840724847e-05,
"loss": 1.7755,
"step": 12200
},
{
"epoch": 0.37,
"learning_rate": 2.6338817358130662e-05,
"loss": 1.8261,
"step": 12300
},
{
"epoch": 0.37,
"learning_rate": 2.630901287553648e-05,
"loss": 1.8139,
"step": 12400
},
{
"epoch": 0.37,
"learning_rate": 2.62792083929423e-05,
"loss": 1.7813,
"step": 12500
},
{
"epoch": 0.38,
"learning_rate": 2.6249403910348116e-05,
"loss": 1.7667,
"step": 12600
},
{
"epoch": 0.38,
"learning_rate": 2.6219599427753935e-05,
"loss": 1.7622,
"step": 12700
},
{
"epoch": 0.38,
"learning_rate": 2.6189794945159754e-05,
"loss": 1.7805,
"step": 12800
},
{
"epoch": 0.38,
"learning_rate": 2.6159990462565573e-05,
"loss": 1.7998,
"step": 12900
},
{
"epoch": 0.39,
"learning_rate": 2.613018597997139e-05,
"loss": 1.7419,
"step": 13000
},
{
"epoch": 0.39,
"learning_rate": 2.6100381497377204e-05,
"loss": 1.7531,
"step": 13100
},
{
"epoch": 0.39,
"learning_rate": 2.6070577014783023e-05,
"loss": 1.7852,
"step": 13200
},
{
"epoch": 0.4,
"learning_rate": 2.6040772532188842e-05,
"loss": 1.7967,
"step": 13300
},
{
"epoch": 0.4,
"learning_rate": 2.6010968049594658e-05,
"loss": 1.7887,
"step": 13400
},
{
"epoch": 0.4,
"learning_rate": 2.5981163567000477e-05,
"loss": 1.7465,
"step": 13500
},
{
"epoch": 0.41,
"learning_rate": 2.5951359084406296e-05,
"loss": 1.7776,
"step": 13600
},
{
"epoch": 0.41,
"learning_rate": 2.592155460181211e-05,
"loss": 1.7584,
"step": 13700
},
{
"epoch": 0.41,
"learning_rate": 2.589175011921793e-05,
"loss": 1.7761,
"step": 13800
},
{
"epoch": 0.41,
"learning_rate": 2.586194563662375e-05,
"loss": 1.7238,
"step": 13900
},
{
"epoch": 0.42,
"learning_rate": 2.5832439198855506e-05,
"loss": 1.7991,
"step": 14000
},
{
"epoch": 0.42,
"learning_rate": 2.5802634716261325e-05,
"loss": 1.7551,
"step": 14100
},
{
"epoch": 0.42,
"learning_rate": 2.5772830233667144e-05,
"loss": 1.8406,
"step": 14200
},
{
"epoch": 0.43,
"learning_rate": 2.574302575107296e-05,
"loss": 1.7478,
"step": 14300
},
{
"epoch": 0.43,
"learning_rate": 2.571322126847878e-05,
"loss": 1.8065,
"step": 14400
},
{
"epoch": 0.43,
"learning_rate": 2.5683416785884598e-05,
"loss": 1.8075,
"step": 14500
},
{
"epoch": 0.44,
"learning_rate": 2.5653612303290417e-05,
"loss": 1.7356,
"step": 14600
},
{
"epoch": 0.44,
"learning_rate": 2.5623807820696232e-05,
"loss": 1.7674,
"step": 14700
},
{
"epoch": 0.44,
"learning_rate": 2.559400333810205e-05,
"loss": 1.7419,
"step": 14800
},
{
"epoch": 0.44,
"learning_rate": 2.556419885550787e-05,
"loss": 1.7297,
"step": 14900
},
{
"epoch": 0.45,
"learning_rate": 2.5534394372913686e-05,
"loss": 1.7576,
"step": 15000
},
{
"epoch": 0.45,
"learning_rate": 2.5504589890319505e-05,
"loss": 1.8,
"step": 15100
},
{
"epoch": 0.45,
"learning_rate": 2.5474785407725324e-05,
"loss": 1.7792,
"step": 15200
},
{
"epoch": 0.46,
"learning_rate": 2.5444980925131143e-05,
"loss": 1.739,
"step": 15300
},
{
"epoch": 0.46,
"learning_rate": 2.5415176442536955e-05,
"loss": 1.7421,
"step": 15400
},
{
"epoch": 0.46,
"learning_rate": 2.5385371959942774e-05,
"loss": 1.7634,
"step": 15500
},
{
"epoch": 0.46,
"learning_rate": 2.5355567477348593e-05,
"loss": 1.8138,
"step": 15600
},
{
"epoch": 0.47,
"learning_rate": 2.5325762994754413e-05,
"loss": 1.6962,
"step": 15700
},
{
"epoch": 0.47,
"learning_rate": 2.5295958512160228e-05,
"loss": 1.7461,
"step": 15800
},
{
"epoch": 0.47,
"learning_rate": 2.5266154029566047e-05,
"loss": 1.7672,
"step": 15900
},
{
"epoch": 0.48,
"learning_rate": 2.5236647591797807e-05,
"loss": 1.7314,
"step": 16000
},
{
"epoch": 0.48,
"learning_rate": 2.5206843109203626e-05,
"loss": 1.7327,
"step": 16100
},
{
"epoch": 0.48,
"learning_rate": 2.5177038626609442e-05,
"loss": 1.702,
"step": 16200
},
{
"epoch": 0.49,
"learning_rate": 2.514723414401526e-05,
"loss": 1.7768,
"step": 16300
},
{
"epoch": 0.49,
"learning_rate": 2.5117429661421076e-05,
"loss": 1.7364,
"step": 16400
},
{
"epoch": 0.49,
"learning_rate": 2.5087625178826895e-05,
"loss": 1.7584,
"step": 16500
},
{
"epoch": 0.49,
"learning_rate": 2.5057820696232714e-05,
"loss": 1.7383,
"step": 16600
},
{
"epoch": 0.5,
"learning_rate": 2.502801621363853e-05,
"loss": 1.7418,
"step": 16700
},
{
"epoch": 0.5,
"learning_rate": 2.499821173104435e-05,
"loss": 1.6813,
"step": 16800
},
{
"epoch": 0.5,
"learning_rate": 2.4968407248450168e-05,
"loss": 1.7484,
"step": 16900
},
{
"epoch": 0.51,
"learning_rate": 2.4938900810681925e-05,
"loss": 1.7323,
"step": 17000
},
{
"epoch": 0.51,
"learning_rate": 2.4909096328087744e-05,
"loss": 1.7436,
"step": 17100
},
{
"epoch": 0.51,
"learning_rate": 2.4879291845493563e-05,
"loss": 1.7571,
"step": 17200
},
{
"epoch": 0.52,
"learning_rate": 2.4849487362899378e-05,
"loss": 1.7422,
"step": 17300
},
{
"epoch": 0.52,
"learning_rate": 2.4819682880305197e-05,
"loss": 1.783,
"step": 17400
},
{
"epoch": 0.52,
"learning_rate": 2.4789878397711016e-05,
"loss": 1.7675,
"step": 17500
},
{
"epoch": 0.52,
"learning_rate": 2.4760073915116835e-05,
"loss": 1.7611,
"step": 17600
},
{
"epoch": 0.53,
"learning_rate": 2.473026943252265e-05,
"loss": 1.7964,
"step": 17700
},
{
"epoch": 0.53,
"learning_rate": 2.470046494992847e-05,
"loss": 1.681,
"step": 17800
},
{
"epoch": 0.53,
"learning_rate": 2.467066046733429e-05,
"loss": 1.6863,
"step": 17900
},
{
"epoch": 0.54,
"learning_rate": 2.4640855984740108e-05,
"loss": 1.725,
"step": 18000
},
{
"epoch": 0.54,
"learning_rate": 2.4611051502145924e-05,
"loss": 1.7158,
"step": 18100
},
{
"epoch": 0.54,
"learning_rate": 2.4581247019551743e-05,
"loss": 1.7756,
"step": 18200
},
{
"epoch": 0.55,
"learning_rate": 2.455144253695756e-05,
"loss": 1.7627,
"step": 18300
},
{
"epoch": 0.55,
"learning_rate": 2.4521638054363374e-05,
"loss": 1.7381,
"step": 18400
},
{
"epoch": 0.55,
"learning_rate": 2.4491833571769193e-05,
"loss": 1.7606,
"step": 18500
},
{
"epoch": 0.55,
"learning_rate": 2.4462029089175012e-05,
"loss": 1.7298,
"step": 18600
},
{
"epoch": 0.56,
"learning_rate": 2.443222460658083e-05,
"loss": 1.7116,
"step": 18700
},
{
"epoch": 0.56,
"learning_rate": 2.4402420123986647e-05,
"loss": 1.775,
"step": 18800
},
{
"epoch": 0.56,
"learning_rate": 2.4372615641392466e-05,
"loss": 1.7324,
"step": 18900
},
{
"epoch": 0.57,
"learning_rate": 2.4342811158798285e-05,
"loss": 1.7449,
"step": 19000
},
{
"epoch": 0.57,
"learning_rate": 2.43130066762041e-05,
"loss": 1.7271,
"step": 19100
},
{
"epoch": 0.57,
"learning_rate": 2.428320219360992e-05,
"loss": 1.7374,
"step": 19200
},
{
"epoch": 0.58,
"learning_rate": 2.425339771101574e-05,
"loss": 1.7579,
"step": 19300
},
{
"epoch": 0.58,
"learning_rate": 2.4223593228421557e-05,
"loss": 1.7903,
"step": 19400
},
{
"epoch": 0.58,
"learning_rate": 2.4193788745827373e-05,
"loss": 1.7057,
"step": 19500
},
{
"epoch": 0.58,
"learning_rate": 2.4163984263233192e-05,
"loss": 1.7083,
"step": 19600
},
{
"epoch": 0.59,
"learning_rate": 2.413417978063901e-05,
"loss": 1.7556,
"step": 19700
},
{
"epoch": 0.59,
"learning_rate": 2.4104375298044827e-05,
"loss": 1.6617,
"step": 19800
},
{
"epoch": 0.59,
"learning_rate": 2.4074570815450642e-05,
"loss": 1.7244,
"step": 19900
},
{
"epoch": 0.6,
"learning_rate": 2.404476633285646e-05,
"loss": 1.7108,
"step": 20000
},
{
"epoch": 0.6,
"learning_rate": 2.401496185026228e-05,
"loss": 1.7411,
"step": 20100
},
{
"epoch": 0.6,
"learning_rate": 2.3985157367668096e-05,
"loss": 1.735,
"step": 20200
},
{
"epoch": 0.61,
"learning_rate": 2.3955352885073915e-05,
"loss": 1.7065,
"step": 20300
},
{
"epoch": 0.61,
"learning_rate": 2.3925548402479734e-05,
"loss": 1.7175,
"step": 20400
},
{
"epoch": 0.61,
"learning_rate": 2.389574391988555e-05,
"loss": 1.6863,
"step": 20500
},
{
"epoch": 0.61,
"learning_rate": 2.386593943729137e-05,
"loss": 1.6851,
"step": 20600
},
{
"epoch": 0.62,
"learning_rate": 2.3836134954697188e-05,
"loss": 1.6974,
"step": 20700
},
{
"epoch": 0.62,
"learning_rate": 2.3806330472103007e-05,
"loss": 1.7211,
"step": 20800
},
{
"epoch": 0.62,
"learning_rate": 2.3776525989508822e-05,
"loss": 1.7042,
"step": 20900
},
{
"epoch": 0.63,
"learning_rate": 2.374672150691464e-05,
"loss": 1.7337,
"step": 21000
},
{
"epoch": 0.63,
"learning_rate": 2.371691702432046e-05,
"loss": 1.7224,
"step": 21100
},
{
"epoch": 0.63,
"learning_rate": 2.3687112541726276e-05,
"loss": 1.6624,
"step": 21200
},
{
"epoch": 0.63,
"learning_rate": 2.3657308059132095e-05,
"loss": 1.6704,
"step": 21300
},
{
"epoch": 0.64,
"learning_rate": 2.362750357653791e-05,
"loss": 1.6547,
"step": 21400
},
{
"epoch": 0.64,
"learning_rate": 2.359769909394373e-05,
"loss": 1.6913,
"step": 21500
},
{
"epoch": 0.64,
"learning_rate": 2.3567894611349545e-05,
"loss": 1.7347,
"step": 21600
},
{
"epoch": 0.65,
"learning_rate": 2.3538090128755364e-05,
"loss": 1.6806,
"step": 21700
},
{
"epoch": 0.65,
"learning_rate": 2.3508285646161183e-05,
"loss": 1.6834,
"step": 21800
},
{
"epoch": 0.65,
"learning_rate": 2.3478481163567002e-05,
"loss": 1.7367,
"step": 21900
},
{
"epoch": 0.66,
"learning_rate": 2.3448676680972818e-05,
"loss": 1.6755,
"step": 22000
},
{
"epoch": 0.66,
"learning_rate": 2.3418872198378637e-05,
"loss": 1.7169,
"step": 22100
},
{
"epoch": 0.66,
"learning_rate": 2.3389067715784456e-05,
"loss": 1.7314,
"step": 22200
},
{
"epoch": 0.66,
"learning_rate": 2.335926323319027e-05,
"loss": 1.7077,
"step": 22300
},
{
"epoch": 0.67,
"learning_rate": 2.332945875059609e-05,
"loss": 1.7225,
"step": 22400
},
{
"epoch": 0.67,
"learning_rate": 2.329965426800191e-05,
"loss": 1.6787,
"step": 22500
},
{
"epoch": 0.67,
"learning_rate": 2.326984978540773e-05,
"loss": 1.6933,
"step": 22600
},
{
"epoch": 0.68,
"learning_rate": 2.3240045302813544e-05,
"loss": 1.7114,
"step": 22700
},
{
"epoch": 0.68,
"learning_rate": 2.3210240820219363e-05,
"loss": 1.7037,
"step": 22800
},
{
"epoch": 0.68,
"learning_rate": 2.318043633762518e-05,
"loss": 1.7217,
"step": 22900
},
{
"epoch": 0.69,
"learning_rate": 2.3150631855030995e-05,
"loss": 1.7062,
"step": 23000
},
{
"epoch": 0.69,
"learning_rate": 2.3120827372436814e-05,
"loss": 1.6969,
"step": 23100
},
{
"epoch": 0.69,
"learning_rate": 2.3091022889842633e-05,
"loss": 1.693,
"step": 23200
},
{
"epoch": 0.69,
"learning_rate": 2.306121840724845e-05,
"loss": 1.6718,
"step": 23300
},
{
"epoch": 0.7,
"learning_rate": 2.3031413924654267e-05,
"loss": 1.6782,
"step": 23400
},
{
"epoch": 0.7,
"learning_rate": 2.3001609442060086e-05,
"loss": 1.6719,
"step": 23500
},
{
"epoch": 0.7,
"learning_rate": 2.2971804959465905e-05,
"loss": 1.6695,
"step": 23600
},
{
"epoch": 0.71,
"learning_rate": 2.294200047687172e-05,
"loss": 1.6336,
"step": 23700
},
{
"epoch": 0.71,
"learning_rate": 2.291219599427754e-05,
"loss": 1.6949,
"step": 23800
},
{
"epoch": 0.71,
"learning_rate": 2.288239151168336e-05,
"loss": 1.6912,
"step": 23900
},
{
"epoch": 0.72,
"learning_rate": 2.2852587029089178e-05,
"loss": 1.6374,
"step": 24000
},
{
"epoch": 0.72,
"learning_rate": 2.2822782546494994e-05,
"loss": 1.6663,
"step": 24100
},
{
"epoch": 0.72,
"learning_rate": 2.2792978063900813e-05,
"loss": 1.7008,
"step": 24200
},
{
"epoch": 0.72,
"learning_rate": 2.276317358130663e-05,
"loss": 1.6883,
"step": 24300
},
{
"epoch": 0.73,
"learning_rate": 2.2733369098712444e-05,
"loss": 1.6593,
"step": 24400
},
{
"epoch": 0.73,
"learning_rate": 2.2703564616118263e-05,
"loss": 1.6847,
"step": 24500
},
{
"epoch": 0.73,
"learning_rate": 2.2673760133524082e-05,
"loss": 1.6684,
"step": 24600
},
{
"epoch": 0.74,
"learning_rate": 2.26439556509299e-05,
"loss": 1.6365,
"step": 24700
},
{
"epoch": 0.74,
"learning_rate": 2.2614151168335717e-05,
"loss": 1.6649,
"step": 24800
},
{
"epoch": 0.74,
"learning_rate": 2.2584346685741536e-05,
"loss": 1.6906,
"step": 24900
},
{
"epoch": 0.75,
"learning_rate": 2.2554542203147355e-05,
"loss": 1.6493,
"step": 25000
},
{
"epoch": 0.75,
"learning_rate": 2.252473772055317e-05,
"loss": 1.6709,
"step": 25100
},
{
"epoch": 0.75,
"learning_rate": 2.249493323795899e-05,
"loss": 1.6905,
"step": 25200
},
{
"epoch": 0.75,
"learning_rate": 2.2465128755364808e-05,
"loss": 1.6655,
"step": 25300
},
{
"epoch": 0.76,
"learning_rate": 2.2435324272770627e-05,
"loss": 1.6957,
"step": 25400
},
{
"epoch": 0.76,
"learning_rate": 2.2405519790176443e-05,
"loss": 1.6703,
"step": 25500
},
{
"epoch": 0.76,
"learning_rate": 2.2376013352408203e-05,
"loss": 1.7062,
"step": 25600
},
{
"epoch": 0.77,
"learning_rate": 2.2346208869814022e-05,
"loss": 1.6806,
"step": 25700
},
{
"epoch": 0.77,
"learning_rate": 2.2316404387219837e-05,
"loss": 1.6701,
"step": 25800
},
{
"epoch": 0.77,
"learning_rate": 2.2286599904625657e-05,
"loss": 1.6538,
"step": 25900
},
{
"epoch": 0.77,
"learning_rate": 2.2256795422031476e-05,
"loss": 1.6635,
"step": 26000
},
{
"epoch": 0.78,
"learning_rate": 2.222699093943729e-05,
"loss": 1.6629,
"step": 26100
},
{
"epoch": 0.78,
"learning_rate": 2.219718645684311e-05,
"loss": 1.7256,
"step": 26200
},
{
"epoch": 0.78,
"learning_rate": 2.216738197424893e-05,
"loss": 1.6586,
"step": 26300
},
{
"epoch": 0.79,
"learning_rate": 2.2137577491654748e-05,
"loss": 1.6814,
"step": 26400
},
{
"epoch": 0.79,
"learning_rate": 2.2107773009060564e-05,
"loss": 1.6326,
"step": 26500
},
{
"epoch": 0.79,
"learning_rate": 2.207796852646638e-05,
"loss": 1.6973,
"step": 26600
},
{
"epoch": 0.8,
"learning_rate": 2.20481640438722e-05,
"loss": 1.6486,
"step": 26700
},
{
"epoch": 0.8,
"learning_rate": 2.2018359561278014e-05,
"loss": 1.6419,
"step": 26800
},
{
"epoch": 0.8,
"learning_rate": 2.1988555078683833e-05,
"loss": 1.665,
"step": 26900
},
{
"epoch": 0.8,
"learning_rate": 2.1958750596089652e-05,
"loss": 1.6391,
"step": 27000
},
{
"epoch": 0.81,
"learning_rate": 2.192894611349547e-05,
"loss": 1.6599,
"step": 27100
},
{
"epoch": 0.81,
"learning_rate": 2.1899141630901287e-05,
"loss": 1.6657,
"step": 27200
},
{
"epoch": 0.81,
"learning_rate": 2.1869337148307106e-05,
"loss": 1.651,
"step": 27300
},
{
"epoch": 0.82,
"learning_rate": 2.1839532665712925e-05,
"loss": 1.6894,
"step": 27400
},
{
"epoch": 0.82,
"learning_rate": 2.180972818311874e-05,
"loss": 1.6259,
"step": 27500
},
{
"epoch": 0.82,
"learning_rate": 2.177992370052456e-05,
"loss": 1.6694,
"step": 27600
},
{
"epoch": 0.83,
"learning_rate": 2.175011921793038e-05,
"loss": 1.7037,
"step": 27700
},
{
"epoch": 0.83,
"learning_rate": 2.1720314735336198e-05,
"loss": 1.6759,
"step": 27800
},
{
"epoch": 0.83,
"learning_rate": 2.1690808297567954e-05,
"loss": 1.6561,
"step": 27900
},
{
"epoch": 0.83,
"learning_rate": 2.1661003814973773e-05,
"loss": 1.6251,
"step": 28000
},
{
"epoch": 0.84,
"learning_rate": 2.1631199332379592e-05,
"loss": 1.6564,
"step": 28100
},
{
"epoch": 0.84,
"learning_rate": 2.1601394849785408e-05,
"loss": 1.5827,
"step": 28200
},
{
"epoch": 0.84,
"learning_rate": 2.1571590367191227e-05,
"loss": 1.6458,
"step": 28300
},
{
"epoch": 0.85,
"learning_rate": 2.1541785884597046e-05,
"loss": 1.6513,
"step": 28400
},
{
"epoch": 0.85,
"learning_rate": 2.151198140200286e-05,
"loss": 1.6328,
"step": 28500
},
{
"epoch": 0.85,
"learning_rate": 2.148217691940868e-05,
"loss": 1.6608,
"step": 28600
},
{
"epoch": 0.86,
"learning_rate": 2.14523724368145e-05,
"loss": 1.6601,
"step": 28700
},
{
"epoch": 0.86,
"learning_rate": 2.142256795422032e-05,
"loss": 1.6835,
"step": 28800
},
{
"epoch": 0.86,
"learning_rate": 2.139276347162613e-05,
"loss": 1.644,
"step": 28900
},
{
"epoch": 0.86,
"learning_rate": 2.136295898903195e-05,
"loss": 1.647,
"step": 29000
},
{
"epoch": 0.87,
"learning_rate": 2.133315450643777e-05,
"loss": 1.6461,
"step": 29100
},
{
"epoch": 0.87,
"learning_rate": 2.1303350023843584e-05,
"loss": 1.6605,
"step": 29200
},
{
"epoch": 0.87,
"learning_rate": 2.1273545541249403e-05,
"loss": 1.693,
"step": 29300
},
{
"epoch": 0.88,
"learning_rate": 2.1243741058655222e-05,
"loss": 1.6356,
"step": 29400
},
{
"epoch": 0.88,
"learning_rate": 2.121393657606104e-05,
"loss": 1.6449,
"step": 29500
},
{
"epoch": 0.88,
"learning_rate": 2.1184132093466857e-05,
"loss": 1.628,
"step": 29600
},
{
"epoch": 0.89,
"learning_rate": 2.1154327610872676e-05,
"loss": 1.6563,
"step": 29700
},
{
"epoch": 0.89,
"learning_rate": 2.1124523128278495e-05,
"loss": 1.6084,
"step": 29800
},
{
"epoch": 0.89,
"learning_rate": 2.109471864568431e-05,
"loss": 1.627,
"step": 29900
},
{
"epoch": 0.89,
"learning_rate": 2.106491416309013e-05,
"loss": 1.6689,
"step": 30000
},
{
"epoch": 0.9,
"learning_rate": 2.103510968049595e-05,
"loss": 1.6247,
"step": 30100
},
{
"epoch": 0.9,
"learning_rate": 2.1005305197901768e-05,
"loss": 1.6514,
"step": 30200
},
{
"epoch": 0.9,
"learning_rate": 2.0975500715307583e-05,
"loss": 1.6428,
"step": 30300
},
{
"epoch": 0.91,
"learning_rate": 2.0945994277539343e-05,
"loss": 1.6138,
"step": 30400
},
{
"epoch": 0.91,
"learning_rate": 2.0916189794945162e-05,
"loss": 1.6405,
"step": 30500
},
{
"epoch": 0.91,
"learning_rate": 2.0886385312350978e-05,
"loss": 1.6526,
"step": 30600
},
{
"epoch": 0.91,
"learning_rate": 2.0856580829756797e-05,
"loss": 1.6827,
"step": 30700
},
{
"epoch": 0.92,
"learning_rate": 2.0826776347162616e-05,
"loss": 1.664,
"step": 30800
},
{
"epoch": 0.92,
"learning_rate": 2.079697186456843e-05,
"loss": 1.635,
"step": 30900
},
{
"epoch": 0.92,
"learning_rate": 2.076716738197425e-05,
"loss": 1.6359,
"step": 31000
},
{
"epoch": 0.93,
"learning_rate": 2.0737362899380066e-05,
"loss": 1.6109,
"step": 31100
},
{
"epoch": 0.93,
"learning_rate": 2.0707558416785885e-05,
"loss": 1.5975,
"step": 31200
},
{
"epoch": 0.93,
"learning_rate": 2.06777539341917e-05,
"loss": 1.6295,
"step": 31300
},
{
"epoch": 0.94,
"learning_rate": 2.064794945159752e-05,
"loss": 1.659,
"step": 31400
},
{
"epoch": 0.94,
"learning_rate": 2.061814496900334e-05,
"loss": 1.6064,
"step": 31500
},
{
"epoch": 0.94,
"learning_rate": 2.0588340486409155e-05,
"loss": 1.646,
"step": 31600
},
{
"epoch": 0.94,
"learning_rate": 2.0558536003814974e-05,
"loss": 1.6404,
"step": 31700
},
{
"epoch": 0.95,
"learning_rate": 2.0528731521220793e-05,
"loss": 1.6281,
"step": 31800
},
{
"epoch": 0.95,
"learning_rate": 2.049892703862661e-05,
"loss": 1.6216,
"step": 31900
},
{
"epoch": 0.95,
"learning_rate": 2.0469122556032427e-05,
"loss": 1.6398,
"step": 32000
},
{
"epoch": 0.96,
"learning_rate": 2.0439318073438246e-05,
"loss": 1.67,
"step": 32100
},
{
"epoch": 0.96,
"learning_rate": 2.0409513590844065e-05,
"loss": 1.619,
"step": 32200
},
{
"epoch": 0.96,
"learning_rate": 2.037970910824988e-05,
"loss": 1.6067,
"step": 32300
},
{
"epoch": 0.97,
"learning_rate": 2.03499046256557e-05,
"loss": 1.6079,
"step": 32400
},
{
"epoch": 0.97,
"learning_rate": 2.032010014306152e-05,
"loss": 1.6647,
"step": 32500
},
{
"epoch": 0.97,
"learning_rate": 2.0290295660467335e-05,
"loss": 1.6308,
"step": 32600
},
{
"epoch": 0.97,
"learning_rate": 2.026049117787315e-05,
"loss": 1.6525,
"step": 32700
},
{
"epoch": 0.98,
"learning_rate": 2.023068669527897e-05,
"loss": 1.6212,
"step": 32800
},
{
"epoch": 0.98,
"learning_rate": 2.020118025751073e-05,
"loss": 1.6845,
"step": 32900
},
{
"epoch": 0.98,
"learning_rate": 2.0171375774916548e-05,
"loss": 1.6324,
"step": 33000
},
{
"epoch": 0.99,
"learning_rate": 2.0141571292322367e-05,
"loss": 1.6024,
"step": 33100
},
{
"epoch": 0.99,
"learning_rate": 2.0111766809728186e-05,
"loss": 1.6687,
"step": 33200
},
{
"epoch": 0.99,
"learning_rate": 2.0081962327134002e-05,
"loss": 1.6338,
"step": 33300
},
{
"epoch": 1.0,
"learning_rate": 2.0052157844539818e-05,
"loss": 1.6172,
"step": 33400
},
{
"epoch": 1.0,
"learning_rate": 2.0022353361945637e-05,
"loss": 1.6706,
"step": 33500
},
{
"epoch": 1.0,
"eval_gen_len": 18.6217,
"eval_loss": 1.5689729452133179,
"eval_rouge1": 31.2477,
"eval_rouge2": 16.5455,
"eval_rougeL": 26.9855,
"eval_rougeLsum": 26.9754,
"eval_runtime": 291.6493,
"eval_samples_per_second": 9.299,
"eval_steps_per_second": 1.162,
"step": 33552
},
{
"epoch": 1.0,
"learning_rate": 1.9992548879351456e-05,
"loss": 1.5373,
"step": 33600
},
{
"epoch": 1.0,
"learning_rate": 1.996274439675727e-05,
"loss": 1.5053,
"step": 33700
},
{
"epoch": 1.01,
"learning_rate": 1.993293991416309e-05,
"loss": 1.4423,
"step": 33800
},
{
"epoch": 1.01,
"learning_rate": 1.990313543156891e-05,
"loss": 1.4924,
"step": 33900
},
{
"epoch": 1.01,
"learning_rate": 1.9873330948974725e-05,
"loss": 1.5381,
"step": 34000
},
{
"epoch": 1.02,
"learning_rate": 1.9843526466380544e-05,
"loss": 1.4693,
"step": 34100
},
{
"epoch": 1.02,
"learning_rate": 1.9813721983786363e-05,
"loss": 1.4748,
"step": 34200
},
{
"epoch": 1.02,
"learning_rate": 1.9783917501192182e-05,
"loss": 1.5042,
"step": 34300
},
{
"epoch": 1.03,
"learning_rate": 1.9754113018597998e-05,
"loss": 1.4755,
"step": 34400
},
{
"epoch": 1.03,
"learning_rate": 1.9724308536003817e-05,
"loss": 1.4685,
"step": 34500
},
{
"epoch": 1.03,
"learning_rate": 1.9694504053409636e-05,
"loss": 1.5157,
"step": 34600
},
{
"epoch": 1.03,
"learning_rate": 1.966469957081545e-05,
"loss": 1.4362,
"step": 34700
},
{
"epoch": 1.04,
"learning_rate": 1.963489508822127e-05,
"loss": 1.4859,
"step": 34800
},
{
"epoch": 1.04,
"learning_rate": 1.9605090605627086e-05,
"loss": 1.4786,
"step": 34900
},
{
"epoch": 1.04,
"learning_rate": 1.9575286123032905e-05,
"loss": 1.4813,
"step": 35000
},
{
"epoch": 1.05,
"learning_rate": 1.954548164043872e-05,
"loss": 1.4775,
"step": 35100
},
{
"epoch": 1.05,
"learning_rate": 1.951567715784454e-05,
"loss": 1.5173,
"step": 35200
},
{
"epoch": 1.05,
"learning_rate": 1.948587267525036e-05,
"loss": 1.5366,
"step": 35300
},
{
"epoch": 1.06,
"learning_rate": 1.9456068192656174e-05,
"loss": 1.4664,
"step": 35400
},
{
"epoch": 1.06,
"learning_rate": 1.9426263710061993e-05,
"loss": 1.5118,
"step": 35500
},
{
"epoch": 1.06,
"learning_rate": 1.9396459227467812e-05,
"loss": 1.4968,
"step": 35600
},
{
"epoch": 1.06,
"learning_rate": 1.936665474487363e-05,
"loss": 1.4721,
"step": 35700
},
{
"epoch": 1.07,
"learning_rate": 1.9336850262279447e-05,
"loss": 1.5235,
"step": 35800
},
{
"epoch": 1.07,
"learning_rate": 1.9307045779685266e-05,
"loss": 1.4908,
"step": 35900
},
{
"epoch": 1.07,
"learning_rate": 1.9277241297091085e-05,
"loss": 1.4354,
"step": 36000
},
{
"epoch": 1.08,
"learning_rate": 1.92474368144969e-05,
"loss": 1.517,
"step": 36100
},
{
"epoch": 1.08,
"learning_rate": 1.921763233190272e-05,
"loss": 1.4848,
"step": 36200
},
{
"epoch": 1.08,
"learning_rate": 1.9187827849308535e-05,
"loss": 1.5129,
"step": 36300
},
{
"epoch": 1.08,
"learning_rate": 1.9158023366714354e-05,
"loss": 1.4799,
"step": 36400
},
{
"epoch": 1.09,
"learning_rate": 1.912821888412017e-05,
"loss": 1.4383,
"step": 36500
},
{
"epoch": 1.09,
"learning_rate": 1.909841440152599e-05,
"loss": 1.4719,
"step": 36600
},
{
"epoch": 1.09,
"learning_rate": 1.9068609918931808e-05,
"loss": 1.4657,
"step": 36700
},
{
"epoch": 1.1,
"learning_rate": 1.9038805436337623e-05,
"loss": 1.5345,
"step": 36800
},
{
"epoch": 1.1,
"learning_rate": 1.9009000953743443e-05,
"loss": 1.4622,
"step": 36900
},
{
"epoch": 1.1,
"learning_rate": 1.897919647114926e-05,
"loss": 1.4856,
"step": 37000
},
{
"epoch": 1.11,
"learning_rate": 1.894939198855508e-05,
"loss": 1.4691,
"step": 37100
},
{
"epoch": 1.11,
"learning_rate": 1.8919587505960896e-05,
"loss": 1.4761,
"step": 37200
},
{
"epoch": 1.11,
"learning_rate": 1.8889783023366715e-05,
"loss": 1.5007,
"step": 37300
},
{
"epoch": 1.11,
"learning_rate": 1.8859978540772534e-05,
"loss": 1.4755,
"step": 37400
},
{
"epoch": 1.12,
"learning_rate": 1.8830174058178353e-05,
"loss": 1.5022,
"step": 37500
},
{
"epoch": 1.12,
"learning_rate": 1.880036957558417e-05,
"loss": 1.4647,
"step": 37600
},
{
"epoch": 1.12,
"learning_rate": 1.8770565092989988e-05,
"loss": 1.4652,
"step": 37700
},
{
"epoch": 1.13,
"learning_rate": 1.8740760610395804e-05,
"loss": 1.4751,
"step": 37800
},
{
"epoch": 1.13,
"learning_rate": 1.871095612780162e-05,
"loss": 1.4767,
"step": 37900
},
{
"epoch": 1.13,
"learning_rate": 1.8681151645207438e-05,
"loss": 1.5161,
"step": 38000
},
{
"epoch": 1.14,
"learning_rate": 1.8651347162613257e-05,
"loss": 1.4369,
"step": 38100
},
{
"epoch": 1.14,
"learning_rate": 1.8621542680019076e-05,
"loss": 1.4612,
"step": 38200
},
{
"epoch": 1.14,
"learning_rate": 1.8591738197424892e-05,
"loss": 1.4817,
"step": 38300
},
{
"epoch": 1.14,
"learning_rate": 1.856193371483071e-05,
"loss": 1.4727,
"step": 38400
},
{
"epoch": 1.15,
"learning_rate": 1.853212923223653e-05,
"loss": 1.4477,
"step": 38500
},
{
"epoch": 1.15,
"learning_rate": 1.8502324749642345e-05,
"loss": 1.4775,
"step": 38600
},
{
"epoch": 1.15,
"learning_rate": 1.8472520267048165e-05,
"loss": 1.484,
"step": 38700
},
{
"epoch": 1.16,
"learning_rate": 1.8442715784453984e-05,
"loss": 1.4885,
"step": 38800
},
{
"epoch": 1.16,
"learning_rate": 1.8412911301859803e-05,
"loss": 1.5263,
"step": 38900
},
{
"epoch": 1.16,
"learning_rate": 1.8383106819265618e-05,
"loss": 1.4864,
"step": 39000
},
{
"epoch": 1.17,
"learning_rate": 1.8353302336671437e-05,
"loss": 1.5276,
"step": 39100
},
{
"epoch": 1.17,
"learning_rate": 1.8323497854077256e-05,
"loss": 1.5265,
"step": 39200
},
{
"epoch": 1.17,
"learning_rate": 1.829369337148307e-05,
"loss": 1.4731,
"step": 39300
},
{
"epoch": 1.17,
"learning_rate": 1.8263888888888887e-05,
"loss": 1.4715,
"step": 39400
},
{
"epoch": 1.18,
"learning_rate": 1.8234084406294706e-05,
"loss": 1.5026,
"step": 39500
},
{
"epoch": 1.18,
"learning_rate": 1.8204279923700526e-05,
"loss": 1.4841,
"step": 39600
},
{
"epoch": 1.18,
"learning_rate": 1.817447544110634e-05,
"loss": 1.4688,
"step": 39700
},
{
"epoch": 1.19,
"learning_rate": 1.814467095851216e-05,
"loss": 1.4782,
"step": 39800
},
{
"epoch": 1.19,
"learning_rate": 1.811486647591798e-05,
"loss": 1.4734,
"step": 39900
},
{
"epoch": 1.19,
"learning_rate": 1.8085061993323795e-05,
"loss": 1.4915,
"step": 40000
},
{
"epoch": 1.2,
"learning_rate": 1.8055257510729614e-05,
"loss": 1.4493,
"step": 40100
},
{
"epoch": 1.2,
"learning_rate": 1.8025453028135433e-05,
"loss": 1.5045,
"step": 40200
},
{
"epoch": 1.2,
"learning_rate": 1.7995648545541252e-05,
"loss": 1.4681,
"step": 40300
},
{
"epoch": 1.2,
"learning_rate": 1.7965844062947067e-05,
"loss": 1.5133,
"step": 40400
},
{
"epoch": 1.21,
"learning_rate": 1.7936039580352887e-05,
"loss": 1.4748,
"step": 40500
},
{
"epoch": 1.21,
"learning_rate": 1.7906235097758706e-05,
"loss": 1.5346,
"step": 40600
},
{
"epoch": 1.21,
"learning_rate": 1.787643061516452e-05,
"loss": 1.4709,
"step": 40700
},
{
"epoch": 1.22,
"learning_rate": 1.7846626132570337e-05,
"loss": 1.4431,
"step": 40800
},
{
"epoch": 1.22,
"learning_rate": 1.7816821649976156e-05,
"loss": 1.5095,
"step": 40900
},
{
"epoch": 1.22,
"learning_rate": 1.7787017167381975e-05,
"loss": 1.4872,
"step": 41000
},
{
"epoch": 1.22,
"learning_rate": 1.775721268478779e-05,
"loss": 1.529,
"step": 41100
},
{
"epoch": 1.23,
"learning_rate": 1.772740820219361e-05,
"loss": 1.4687,
"step": 41200
},
{
"epoch": 1.23,
"learning_rate": 1.769760371959943e-05,
"loss": 1.4696,
"step": 41300
},
{
"epoch": 1.23,
"learning_rate": 1.7667799237005248e-05,
"loss": 1.457,
"step": 41400
},
{
"epoch": 1.24,
"learning_rate": 1.7637994754411063e-05,
"loss": 1.5051,
"step": 41500
},
{
"epoch": 1.24,
"learning_rate": 1.7608190271816882e-05,
"loss": 1.4694,
"step": 41600
},
{
"epoch": 1.24,
"learning_rate": 1.75783857892227e-05,
"loss": 1.4556,
"step": 41700
},
{
"epoch": 1.25,
"learning_rate": 1.7548581306628517e-05,
"loss": 1.4872,
"step": 41800
},
{
"epoch": 1.25,
"learning_rate": 1.7518776824034336e-05,
"loss": 1.4489,
"step": 41900
},
{
"epoch": 1.25,
"learning_rate": 1.7488972341440155e-05,
"loss": 1.4892,
"step": 42000
},
{
"epoch": 1.25,
"learning_rate": 1.7459167858845974e-05,
"loss": 1.4869,
"step": 42100
},
{
"epoch": 1.26,
"learning_rate": 1.742936337625179e-05,
"loss": 1.4624,
"step": 42200
},
{
"epoch": 1.26,
"learning_rate": 1.7399558893657605e-05,
"loss": 1.4492,
"step": 42300
},
{
"epoch": 1.26,
"learning_rate": 1.7369754411063424e-05,
"loss": 1.4306,
"step": 42400
},
{
"epoch": 1.27,
"learning_rate": 1.733994992846924e-05,
"loss": 1.4595,
"step": 42500
},
{
"epoch": 1.27,
"learning_rate": 1.731014544587506e-05,
"loss": 1.4703,
"step": 42600
},
{
"epoch": 1.27,
"learning_rate": 1.7280340963280878e-05,
"loss": 1.4962,
"step": 42700
},
{
"epoch": 1.28,
"learning_rate": 1.7250536480686697e-05,
"loss": 1.4476,
"step": 42800
},
{
"epoch": 1.28,
"learning_rate": 1.7220731998092512e-05,
"loss": 1.4674,
"step": 42900
},
{
"epoch": 1.28,
"learning_rate": 1.719092751549833e-05,
"loss": 1.4704,
"step": 43000
},
{
"epoch": 1.28,
"learning_rate": 1.716112303290415e-05,
"loss": 1.5435,
"step": 43100
},
{
"epoch": 1.29,
"learning_rate": 1.7131318550309966e-05,
"loss": 1.5339,
"step": 43200
},
{
"epoch": 1.29,
"learning_rate": 1.7101514067715785e-05,
"loss": 1.4905,
"step": 43300
},
{
"epoch": 1.29,
"learning_rate": 1.7071709585121604e-05,
"loss": 1.4978,
"step": 43400
},
{
"epoch": 1.3,
"learning_rate": 1.7041905102527423e-05,
"loss": 1.4478,
"step": 43500
},
{
"epoch": 1.3,
"learning_rate": 1.701239866475918e-05,
"loss": 1.5059,
"step": 43600
},
{
"epoch": 1.3,
"learning_rate": 1.6982594182165e-05,
"loss": 1.4643,
"step": 43700
},
{
"epoch": 1.31,
"learning_rate": 1.6953087744396755e-05,
"loss": 1.5011,
"step": 43800
},
{
"epoch": 1.31,
"learning_rate": 1.6923283261802574e-05,
"loss": 1.4895,
"step": 43900
},
{
"epoch": 1.31,
"learning_rate": 1.6893478779208393e-05,
"loss": 1.4306,
"step": 44000
},
{
"epoch": 1.31,
"learning_rate": 1.686367429661421e-05,
"loss": 1.5001,
"step": 44100
},
{
"epoch": 1.32,
"learning_rate": 1.6834167858845972e-05,
"loss": 1.4506,
"step": 44200
},
{
"epoch": 1.32,
"learning_rate": 1.680436337625179e-05,
"loss": 1.4456,
"step": 44300
},
{
"epoch": 1.32,
"learning_rate": 1.6774558893657607e-05,
"loss": 1.5038,
"step": 44400
},
{
"epoch": 1.33,
"learning_rate": 1.6744754411063426e-05,
"loss": 1.5082,
"step": 44500
},
{
"epoch": 1.33,
"learning_rate": 1.671494992846924e-05,
"loss": 1.5055,
"step": 44600
},
{
"epoch": 1.33,
"learning_rate": 1.6685145445875057e-05,
"loss": 1.4697,
"step": 44700
},
{
"epoch": 1.34,
"learning_rate": 1.6655340963280876e-05,
"loss": 1.501,
"step": 44800
},
{
"epoch": 1.34,
"learning_rate": 1.6625536480686695e-05,
"loss": 1.4994,
"step": 44900
},
{
"epoch": 1.34,
"learning_rate": 1.6595731998092514e-05,
"loss": 1.4899,
"step": 45000
},
{
"epoch": 1.34,
"learning_rate": 1.656592751549833e-05,
"loss": 1.4819,
"step": 45100
},
{
"epoch": 1.35,
"learning_rate": 1.653612303290415e-05,
"loss": 1.4529,
"step": 45200
},
{
"epoch": 1.35,
"learning_rate": 1.6506318550309968e-05,
"loss": 1.4849,
"step": 45300
},
{
"epoch": 1.35,
"learning_rate": 1.6476514067715784e-05,
"loss": 1.4587,
"step": 45400
},
{
"epoch": 1.36,
"learning_rate": 1.6446709585121603e-05,
"loss": 1.4595,
"step": 45500
},
{
"epoch": 1.36,
"learning_rate": 1.641690510252742e-05,
"loss": 1.4292,
"step": 45600
},
{
"epoch": 1.36,
"learning_rate": 1.638710061993324e-05,
"loss": 1.4624,
"step": 45700
},
{
"epoch": 1.37,
"learning_rate": 1.6357296137339056e-05,
"loss": 1.5235,
"step": 45800
},
{
"epoch": 1.37,
"learning_rate": 1.6327491654744875e-05,
"loss": 1.5378,
"step": 45900
},
{
"epoch": 1.37,
"learning_rate": 1.6297687172150694e-05,
"loss": 1.4633,
"step": 46000
},
{
"epoch": 1.37,
"learning_rate": 1.626788268955651e-05,
"loss": 1.4387,
"step": 46100
},
{
"epoch": 1.38,
"learning_rate": 1.6238078206962326e-05,
"loss": 1.4335,
"step": 46200
},
{
"epoch": 1.38,
"learning_rate": 1.6208273724368145e-05,
"loss": 1.4708,
"step": 46300
},
{
"epoch": 1.38,
"learning_rate": 1.6178469241773964e-05,
"loss": 1.4633,
"step": 46400
},
{
"epoch": 1.39,
"learning_rate": 1.6148962804005723e-05,
"loss": 1.5035,
"step": 46500
},
{
"epoch": 1.39,
"learning_rate": 1.6119158321411542e-05,
"loss": 1.4553,
"step": 46600
},
{
"epoch": 1.39,
"learning_rate": 1.608935383881736e-05,
"loss": 1.4737,
"step": 46700
},
{
"epoch": 1.39,
"learning_rate": 1.6059549356223177e-05,
"loss": 1.4676,
"step": 46800
},
{
"epoch": 1.4,
"learning_rate": 1.6029744873628993e-05,
"loss": 1.4482,
"step": 46900
},
{
"epoch": 1.4,
"learning_rate": 1.5999940391034812e-05,
"loss": 1.4381,
"step": 47000
},
{
"epoch": 1.4,
"learning_rate": 1.5970135908440627e-05,
"loss": 1.4099,
"step": 47100
},
{
"epoch": 1.41,
"learning_rate": 1.5940331425846446e-05,
"loss": 1.4595,
"step": 47200
},
{
"epoch": 1.41,
"learning_rate": 1.5910526943252265e-05,
"loss": 1.4551,
"step": 47300
},
{
"epoch": 1.41,
"learning_rate": 1.5880722460658084e-05,
"loss": 1.4629,
"step": 47400
},
{
"epoch": 1.42,
"learning_rate": 1.58509179780639e-05,
"loss": 1.4616,
"step": 47500
},
{
"epoch": 1.42,
"learning_rate": 1.582111349546972e-05,
"loss": 1.4742,
"step": 47600
},
{
"epoch": 1.42,
"learning_rate": 1.5791309012875538e-05,
"loss": 1.4735,
"step": 47700
},
{
"epoch": 1.42,
"learning_rate": 1.5761504530281354e-05,
"loss": 1.4289,
"step": 47800
},
{
"epoch": 1.43,
"learning_rate": 1.5731700047687173e-05,
"loss": 1.4924,
"step": 47900
},
{
"epoch": 1.43,
"learning_rate": 1.5701895565092992e-05,
"loss": 1.4643,
"step": 48000
},
{
"epoch": 1.43,
"learning_rate": 1.567209108249881e-05,
"loss": 1.4499,
"step": 48100
},
{
"epoch": 1.44,
"learning_rate": 1.5642286599904626e-05,
"loss": 1.5153,
"step": 48200
},
{
"epoch": 1.44,
"learning_rate": 1.5612482117310442e-05,
"loss": 1.4627,
"step": 48300
},
{
"epoch": 1.44,
"learning_rate": 1.558267763471626e-05,
"loss": 1.426,
"step": 48400
},
{
"epoch": 1.45,
"learning_rate": 1.555317119694802e-05,
"loss": 1.4213,
"step": 48500
},
{
"epoch": 1.45,
"learning_rate": 1.552336671435384e-05,
"loss": 1.4519,
"step": 48600
},
{
"epoch": 1.45,
"learning_rate": 1.549356223175966e-05,
"loss": 1.4877,
"step": 48700
},
{
"epoch": 1.45,
"learning_rate": 1.5463757749165475e-05,
"loss": 1.488,
"step": 48800
},
{
"epoch": 1.46,
"learning_rate": 1.5433953266571294e-05,
"loss": 1.4738,
"step": 48900
},
{
"epoch": 1.46,
"learning_rate": 1.5404148783977113e-05,
"loss": 1.4912,
"step": 49000
},
{
"epoch": 1.46,
"learning_rate": 1.537434430138293e-05,
"loss": 1.4391,
"step": 49100
},
{
"epoch": 1.47,
"learning_rate": 1.5344539818788744e-05,
"loss": 1.4983,
"step": 49200
},
{
"epoch": 1.47,
"learning_rate": 1.5314735336194563e-05,
"loss": 1.411,
"step": 49300
},
{
"epoch": 1.47,
"learning_rate": 1.5284930853600382e-05,
"loss": 1.5083,
"step": 49400
},
{
"epoch": 1.48,
"learning_rate": 1.5255126371006198e-05,
"loss": 1.4399,
"step": 49500
},
{
"epoch": 1.48,
"learning_rate": 1.5225321888412017e-05,
"loss": 1.4691,
"step": 49600
},
{
"epoch": 1.48,
"learning_rate": 1.5195517405817836e-05,
"loss": 1.4732,
"step": 49700
},
{
"epoch": 1.48,
"learning_rate": 1.5165712923223655e-05,
"loss": 1.5002,
"step": 49800
},
{
"epoch": 1.49,
"learning_rate": 1.513590844062947e-05,
"loss": 1.4984,
"step": 49900
},
{
"epoch": 1.49,
"learning_rate": 1.510610395803529e-05,
"loss": 1.4538,
"step": 50000
},
{
"epoch": 1.49,
"learning_rate": 1.5076299475441108e-05,
"loss": 1.4828,
"step": 50100
},
{
"epoch": 1.5,
"learning_rate": 1.5046494992846922e-05,
"loss": 1.5021,
"step": 50200
},
{
"epoch": 1.5,
"learning_rate": 1.5016690510252741e-05,
"loss": 1.4735,
"step": 50300
},
{
"epoch": 1.5,
"learning_rate": 1.498688602765856e-05,
"loss": 1.4508,
"step": 50400
},
{
"epoch": 1.51,
"learning_rate": 1.4957081545064378e-05,
"loss": 1.5091,
"step": 50500
},
{
"epoch": 1.51,
"learning_rate": 1.4927277062470197e-05,
"loss": 1.4725,
"step": 50600
},
{
"epoch": 1.51,
"learning_rate": 1.4897472579876014e-05,
"loss": 1.4495,
"step": 50700
},
{
"epoch": 1.51,
"learning_rate": 1.4867668097281831e-05,
"loss": 1.4596,
"step": 50800
},
{
"epoch": 1.52,
"learning_rate": 1.483786361468765e-05,
"loss": 1.4447,
"step": 50900
},
{
"epoch": 1.52,
"learning_rate": 1.4808059132093466e-05,
"loss": 1.4665,
"step": 51000
},
{
"epoch": 1.52,
"learning_rate": 1.4778254649499285e-05,
"loss": 1.4793,
"step": 51100
},
{
"epoch": 1.53,
"learning_rate": 1.4748450166905102e-05,
"loss": 1.4722,
"step": 51200
},
{
"epoch": 1.53,
"learning_rate": 1.4718645684310921e-05,
"loss": 1.4474,
"step": 51300
},
{
"epoch": 1.53,
"learning_rate": 1.4688841201716739e-05,
"loss": 1.4295,
"step": 51400
},
{
"epoch": 1.53,
"learning_rate": 1.4659036719122558e-05,
"loss": 1.4441,
"step": 51500
},
{
"epoch": 1.54,
"learning_rate": 1.4629232236528375e-05,
"loss": 1.4148,
"step": 51600
},
{
"epoch": 1.54,
"learning_rate": 1.459942775393419e-05,
"loss": 1.4457,
"step": 51700
},
{
"epoch": 1.54,
"learning_rate": 1.456962327134001e-05,
"loss": 1.4851,
"step": 51800
},
{
"epoch": 1.55,
"learning_rate": 1.4539818788745827e-05,
"loss": 1.4864,
"step": 51900
},
{
"epoch": 1.55,
"learning_rate": 1.4510014306151646e-05,
"loss": 1.4926,
"step": 52000
},
{
"epoch": 1.55,
"learning_rate": 1.4480209823557463e-05,
"loss": 1.4553,
"step": 52100
},
{
"epoch": 1.56,
"learning_rate": 1.4450405340963282e-05,
"loss": 1.4744,
"step": 52200
},
{
"epoch": 1.56,
"learning_rate": 1.44206008583691e-05,
"loss": 1.4624,
"step": 52300
},
{
"epoch": 1.56,
"learning_rate": 1.4390796375774915e-05,
"loss": 1.4483,
"step": 52400
},
{
"epoch": 1.56,
"learning_rate": 1.4360991893180734e-05,
"loss": 1.4569,
"step": 52500
},
{
"epoch": 1.57,
"learning_rate": 1.4331187410586552e-05,
"loss": 1.4262,
"step": 52600
},
{
"epoch": 1.57,
"learning_rate": 1.430138292799237e-05,
"loss": 1.4517,
"step": 52700
},
{
"epoch": 1.57,
"learning_rate": 1.4271578445398188e-05,
"loss": 1.4496,
"step": 52800
},
{
"epoch": 1.58,
"learning_rate": 1.4241773962804007e-05,
"loss": 1.4519,
"step": 52900
},
{
"epoch": 1.58,
"learning_rate": 1.4211969480209824e-05,
"loss": 1.4407,
"step": 53000
},
{
"epoch": 1.58,
"learning_rate": 1.4182164997615643e-05,
"loss": 1.4715,
"step": 53100
},
{
"epoch": 1.59,
"learning_rate": 1.4152360515021459e-05,
"loss": 1.4313,
"step": 53200
},
{
"epoch": 1.59,
"learning_rate": 1.4122556032427276e-05,
"loss": 1.4798,
"step": 53300
},
{
"epoch": 1.59,
"learning_rate": 1.4092751549833095e-05,
"loss": 1.4386,
"step": 53400
},
{
"epoch": 1.59,
"learning_rate": 1.4062947067238913e-05,
"loss": 1.4592,
"step": 53500
},
{
"epoch": 1.6,
"learning_rate": 1.4033142584644732e-05,
"loss": 1.4857,
"step": 53600
},
{
"epoch": 1.6,
"learning_rate": 1.4003338102050549e-05,
"loss": 1.463,
"step": 53700
},
{
"epoch": 1.6,
"learning_rate": 1.3973533619456368e-05,
"loss": 1.4752,
"step": 53800
},
{
"epoch": 1.61,
"learning_rate": 1.3943729136862184e-05,
"loss": 1.4524,
"step": 53900
},
{
"epoch": 1.61,
"learning_rate": 1.3913924654268001e-05,
"loss": 1.4677,
"step": 54000
},
{
"epoch": 1.61,
"learning_rate": 1.388412017167382e-05,
"loss": 1.48,
"step": 54100
},
{
"epoch": 1.62,
"learning_rate": 1.3854315689079637e-05,
"loss": 1.4365,
"step": 54200
},
{
"epoch": 1.62,
"learning_rate": 1.3824511206485456e-05,
"loss": 1.485,
"step": 54300
},
{
"epoch": 1.62,
"learning_rate": 1.3794706723891274e-05,
"loss": 1.4921,
"step": 54400
},
{
"epoch": 1.62,
"learning_rate": 1.3764902241297093e-05,
"loss": 1.4118,
"step": 54500
},
{
"epoch": 1.63,
"learning_rate": 1.373509775870291e-05,
"loss": 1.4373,
"step": 54600
},
{
"epoch": 1.63,
"learning_rate": 1.3705293276108726e-05,
"loss": 1.4557,
"step": 54700
},
{
"epoch": 1.63,
"learning_rate": 1.3675488793514545e-05,
"loss": 1.4653,
"step": 54800
},
{
"epoch": 1.64,
"learning_rate": 1.3645684310920362e-05,
"loss": 1.4442,
"step": 54900
},
{
"epoch": 1.64,
"learning_rate": 1.3615879828326181e-05,
"loss": 1.4891,
"step": 55000
},
{
"epoch": 1.64,
"learning_rate": 1.3586075345731998e-05,
"loss": 1.4398,
"step": 55100
},
{
"epoch": 1.65,
"learning_rate": 1.3556270863137817e-05,
"loss": 1.4589,
"step": 55200
},
{
"epoch": 1.65,
"learning_rate": 1.3526466380543635e-05,
"loss": 1.4547,
"step": 55300
},
{
"epoch": 1.65,
"learning_rate": 1.3496661897949452e-05,
"loss": 1.4411,
"step": 55400
},
{
"epoch": 1.65,
"learning_rate": 1.346685741535527e-05,
"loss": 1.4369,
"step": 55500
},
{
"epoch": 1.66,
"learning_rate": 1.3437052932761087e-05,
"loss": 1.4799,
"step": 55600
},
{
"epoch": 1.66,
"learning_rate": 1.3407546494992847e-05,
"loss": 1.4088,
"step": 55700
},
{
"epoch": 1.66,
"learning_rate": 1.3377742012398666e-05,
"loss": 1.5022,
"step": 55800
},
{
"epoch": 1.67,
"learning_rate": 1.3347937529804483e-05,
"loss": 1.454,
"step": 55900
},
{
"epoch": 1.67,
"learning_rate": 1.3318133047210302e-05,
"loss": 1.4688,
"step": 56000
},
{
"epoch": 1.67,
"learning_rate": 1.328832856461612e-05,
"loss": 1.4207,
"step": 56100
},
{
"epoch": 1.68,
"learning_rate": 1.3258524082021937e-05,
"loss": 1.4623,
"step": 56200
},
{
"epoch": 1.68,
"learning_rate": 1.3228719599427754e-05,
"loss": 1.4111,
"step": 56300
},
{
"epoch": 1.68,
"learning_rate": 1.3198915116833571e-05,
"loss": 1.4048,
"step": 56400
},
{
"epoch": 1.68,
"learning_rate": 1.316911063423939e-05,
"loss": 1.5198,
"step": 56500
},
{
"epoch": 1.69,
"learning_rate": 1.3139306151645208e-05,
"loss": 1.4427,
"step": 56600
},
{
"epoch": 1.69,
"learning_rate": 1.3109501669051027e-05,
"loss": 1.451,
"step": 56700
},
{
"epoch": 1.69,
"learning_rate": 1.3079697186456844e-05,
"loss": 1.4568,
"step": 56800
},
{
"epoch": 1.7,
"learning_rate": 1.3049892703862661e-05,
"loss": 1.4529,
"step": 56900
},
{
"epoch": 1.7,
"learning_rate": 1.3020088221268479e-05,
"loss": 1.4692,
"step": 57000
},
{
"epoch": 1.7,
"learning_rate": 1.2990283738674296e-05,
"loss": 1.4401,
"step": 57100
},
{
"epoch": 1.7,
"learning_rate": 1.2960479256080115e-05,
"loss": 1.4887,
"step": 57200
},
{
"epoch": 1.71,
"learning_rate": 1.2930674773485932e-05,
"loss": 1.4467,
"step": 57300
},
{
"epoch": 1.71,
"learning_rate": 1.2900870290891751e-05,
"loss": 1.451,
"step": 57400
},
{
"epoch": 1.71,
"learning_rate": 1.2871065808297569e-05,
"loss": 1.4036,
"step": 57500
},
{
"epoch": 1.72,
"learning_rate": 1.2841261325703388e-05,
"loss": 1.4618,
"step": 57600
},
{
"epoch": 1.72,
"learning_rate": 1.2811456843109203e-05,
"loss": 1.4312,
"step": 57700
},
{
"epoch": 1.72,
"learning_rate": 1.2781950405340963e-05,
"loss": 1.4498,
"step": 57800
},
{
"epoch": 1.73,
"learning_rate": 1.275214592274678e-05,
"loss": 1.4157,
"step": 57900
},
{
"epoch": 1.73,
"learning_rate": 1.27223414401526e-05,
"loss": 1.5118,
"step": 58000
},
{
"epoch": 1.73,
"learning_rate": 1.2692536957558417e-05,
"loss": 1.3883,
"step": 58100
},
{
"epoch": 1.73,
"learning_rate": 1.2662732474964236e-05,
"loss": 1.4585,
"step": 58200
},
{
"epoch": 1.74,
"learning_rate": 1.2632927992370053e-05,
"loss": 1.4856,
"step": 58300
},
{
"epoch": 1.74,
"learning_rate": 1.260312350977587e-05,
"loss": 1.4275,
"step": 58400
},
{
"epoch": 1.74,
"learning_rate": 1.2573319027181688e-05,
"loss": 1.4411,
"step": 58500
},
{
"epoch": 1.75,
"learning_rate": 1.2543514544587505e-05,
"loss": 1.415,
"step": 58600
},
{
"epoch": 1.75,
"learning_rate": 1.2513710061993324e-05,
"loss": 1.4709,
"step": 58700
},
{
"epoch": 1.75,
"learning_rate": 1.2483905579399141e-05,
"loss": 1.4638,
"step": 58800
},
{
"epoch": 1.76,
"learning_rate": 1.245410109680496e-05,
"loss": 1.4267,
"step": 58900
},
{
"epoch": 1.76,
"learning_rate": 1.2424296614210778e-05,
"loss": 1.4549,
"step": 59000
},
{
"epoch": 1.76,
"learning_rate": 1.2394492131616597e-05,
"loss": 1.4498,
"step": 59100
},
{
"epoch": 1.76,
"learning_rate": 1.2364687649022412e-05,
"loss": 1.4167,
"step": 59200
},
{
"epoch": 1.77,
"learning_rate": 1.2334883166428231e-05,
"loss": 1.4539,
"step": 59300
},
{
"epoch": 1.77,
"learning_rate": 1.2305078683834049e-05,
"loss": 1.4552,
"step": 59400
},
{
"epoch": 1.77,
"learning_rate": 1.2275274201239866e-05,
"loss": 1.4319,
"step": 59500
},
{
"epoch": 1.78,
"learning_rate": 1.2245469718645685e-05,
"loss": 1.4416,
"step": 59600
},
{
"epoch": 1.78,
"learning_rate": 1.2215665236051502e-05,
"loss": 1.4767,
"step": 59700
},
{
"epoch": 1.78,
"learning_rate": 1.2185860753457321e-05,
"loss": 1.4194,
"step": 59800
},
{
"epoch": 1.79,
"learning_rate": 1.2156056270863137e-05,
"loss": 1.4524,
"step": 59900
},
{
"epoch": 1.79,
"learning_rate": 1.2126251788268956e-05,
"loss": 1.3962,
"step": 60000
},
{
"epoch": 1.79,
"learning_rate": 1.2096745350500716e-05,
"loss": 1.4556,
"step": 60100
},
{
"epoch": 1.79,
"learning_rate": 1.2067238912732474e-05,
"loss": 1.453,
"step": 60200
},
{
"epoch": 1.8,
"learning_rate": 1.2037434430138293e-05,
"loss": 1.4252,
"step": 60300
},
{
"epoch": 1.8,
"learning_rate": 1.2007927992370053e-05,
"loss": 1.4319,
"step": 60400
},
{
"epoch": 1.8,
"learning_rate": 1.197812350977587e-05,
"loss": 1.4448,
"step": 60500
},
{
"epoch": 1.81,
"learning_rate": 1.194831902718169e-05,
"loss": 1.4311,
"step": 60600
},
{
"epoch": 1.81,
"learning_rate": 1.1918514544587505e-05,
"loss": 1.4623,
"step": 60700
},
{
"epoch": 1.81,
"learning_rate": 1.1888710061993324e-05,
"loss": 1.408,
"step": 60800
},
{
"epoch": 1.82,
"learning_rate": 1.1858905579399142e-05,
"loss": 1.4497,
"step": 60900
},
{
"epoch": 1.82,
"learning_rate": 1.1829101096804959e-05,
"loss": 1.4413,
"step": 61000
},
{
"epoch": 1.82,
"learning_rate": 1.1799296614210778e-05,
"loss": 1.4201,
"step": 61100
},
{
"epoch": 1.82,
"learning_rate": 1.1769492131616595e-05,
"loss": 1.452,
"step": 61200
},
{
"epoch": 1.83,
"learning_rate": 1.1739687649022414e-05,
"loss": 1.4851,
"step": 61300
},
{
"epoch": 1.83,
"learning_rate": 1.1709883166428232e-05,
"loss": 1.4997,
"step": 61400
},
{
"epoch": 1.83,
"learning_rate": 1.1680078683834049e-05,
"loss": 1.461,
"step": 61500
},
{
"epoch": 1.84,
"learning_rate": 1.1650274201239866e-05,
"loss": 1.3893,
"step": 61600
},
{
"epoch": 1.84,
"learning_rate": 1.1620767763471626e-05,
"loss": 1.48,
"step": 61700
},
{
"epoch": 1.84,
"learning_rate": 1.1590963280877443e-05,
"loss": 1.451,
"step": 61800
},
{
"epoch": 1.84,
"learning_rate": 1.1561158798283262e-05,
"loss": 1.472,
"step": 61900
},
{
"epoch": 1.85,
"learning_rate": 1.153135431568908e-05,
"loss": 1.446,
"step": 62000
},
{
"epoch": 1.85,
"learning_rate": 1.1501549833094899e-05,
"loss": 1.4504,
"step": 62100
},
{
"epoch": 1.85,
"learning_rate": 1.1471745350500716e-05,
"loss": 1.4677,
"step": 62200
},
{
"epoch": 1.86,
"learning_rate": 1.1441940867906533e-05,
"loss": 1.4301,
"step": 62300
},
{
"epoch": 1.86,
"learning_rate": 1.141213638531235e-05,
"loss": 1.4207,
"step": 62400
},
{
"epoch": 1.86,
"learning_rate": 1.138233190271817e-05,
"loss": 1.4144,
"step": 62500
},
{
"epoch": 1.87,
"learning_rate": 1.1352527420123987e-05,
"loss": 1.4468,
"step": 62600
},
{
"epoch": 1.87,
"learning_rate": 1.1322722937529804e-05,
"loss": 1.4232,
"step": 62700
},
{
"epoch": 1.87,
"learning_rate": 1.1292918454935623e-05,
"loss": 1.4328,
"step": 62800
},
{
"epoch": 1.87,
"learning_rate": 1.126311397234144e-05,
"loss": 1.4364,
"step": 62900
},
{
"epoch": 1.88,
"learning_rate": 1.1233309489747258e-05,
"loss": 1.4485,
"step": 63000
},
{
"epoch": 1.88,
"learning_rate": 1.1203505007153075e-05,
"loss": 1.4054,
"step": 63100
},
{
"epoch": 1.88,
"learning_rate": 1.1173700524558894e-05,
"loss": 1.438,
"step": 63200
},
{
"epoch": 1.89,
"learning_rate": 1.1143896041964712e-05,
"loss": 1.4417,
"step": 63300
},
{
"epoch": 1.89,
"learning_rate": 1.1114091559370529e-05,
"loss": 1.5028,
"step": 63400
},
{
"epoch": 1.89,
"learning_rate": 1.1084287076776348e-05,
"loss": 1.424,
"step": 63500
},
{
"epoch": 1.9,
"learning_rate": 1.1054482594182165e-05,
"loss": 1.3832,
"step": 63600
},
{
"epoch": 1.9,
"learning_rate": 1.1024678111587983e-05,
"loss": 1.413,
"step": 63700
},
{
"epoch": 1.9,
"learning_rate": 1.09948736289938e-05,
"loss": 1.4512,
"step": 63800
},
{
"epoch": 1.9,
"learning_rate": 1.0965069146399619e-05,
"loss": 1.4262,
"step": 63900
},
{
"epoch": 1.91,
"learning_rate": 1.0935264663805436e-05,
"loss": 1.4745,
"step": 64000
},
{
"epoch": 1.91,
"learning_rate": 1.0905460181211254e-05,
"loss": 1.4105,
"step": 64100
},
{
"epoch": 1.91,
"learning_rate": 1.0875655698617073e-05,
"loss": 1.4641,
"step": 64200
},
{
"epoch": 1.92,
"learning_rate": 1.084585121602289e-05,
"loss": 1.4694,
"step": 64300
},
{
"epoch": 1.92,
"learning_rate": 1.0816046733428709e-05,
"loss": 1.4276,
"step": 64400
},
{
"epoch": 1.92,
"learning_rate": 1.0786242250834525e-05,
"loss": 1.4694,
"step": 64500
},
{
"epoch": 1.93,
"learning_rate": 1.0756437768240344e-05,
"loss": 1.4874,
"step": 64600
},
{
"epoch": 1.93,
"learning_rate": 1.0726633285646161e-05,
"loss": 1.4515,
"step": 64700
},
{
"epoch": 1.93,
"learning_rate": 1.069682880305198e-05,
"loss": 1.4539,
"step": 64800
},
{
"epoch": 1.93,
"learning_rate": 1.0667024320457797e-05,
"loss": 1.3871,
"step": 64900
},
{
"epoch": 1.94,
"learning_rate": 1.0637219837863615e-05,
"loss": 1.4109,
"step": 65000
},
{
"epoch": 1.94,
"learning_rate": 1.0607415355269434e-05,
"loss": 1.4384,
"step": 65100
},
{
"epoch": 1.94,
"learning_rate": 1.057761087267525e-05,
"loss": 1.4199,
"step": 65200
},
{
"epoch": 1.95,
"learning_rate": 1.0547806390081068e-05,
"loss": 1.46,
"step": 65300
},
{
"epoch": 1.95,
"learning_rate": 1.0518001907486886e-05,
"loss": 1.4389,
"step": 65400
},
{
"epoch": 1.95,
"learning_rate": 1.0488197424892705e-05,
"loss": 1.425,
"step": 65500
},
{
"epoch": 1.96,
"learning_rate": 1.0458392942298522e-05,
"loss": 1.4028,
"step": 65600
},
{
"epoch": 1.96,
"learning_rate": 1.042858845970434e-05,
"loss": 1.4536,
"step": 65700
},
{
"epoch": 1.96,
"learning_rate": 1.0398783977110158e-05,
"loss": 1.4986,
"step": 65800
},
{
"epoch": 1.96,
"learning_rate": 1.0368979494515976e-05,
"loss": 1.4005,
"step": 65900
},
{
"epoch": 1.97,
"learning_rate": 1.0339175011921793e-05,
"loss": 1.4286,
"step": 66000
},
{
"epoch": 1.97,
"learning_rate": 1.030937052932761e-05,
"loss": 1.4028,
"step": 66100
},
{
"epoch": 1.97,
"learning_rate": 1.027956604673343e-05,
"loss": 1.4589,
"step": 66200
},
{
"epoch": 1.98,
"learning_rate": 1.025005960896519e-05,
"loss": 1.4169,
"step": 66300
},
{
"epoch": 1.98,
"learning_rate": 1.0220255126371007e-05,
"loss": 1.4127,
"step": 66400
},
{
"epoch": 1.98,
"learning_rate": 1.0190450643776824e-05,
"loss": 1.4243,
"step": 66500
},
{
"epoch": 1.98,
"learning_rate": 1.0160646161182643e-05,
"loss": 1.3892,
"step": 66600
},
{
"epoch": 1.99,
"learning_rate": 1.0130841678588459e-05,
"loss": 1.4301,
"step": 66700
},
{
"epoch": 1.99,
"learning_rate": 1.0101037195994278e-05,
"loss": 1.4429,
"step": 66800
},
{
"epoch": 1.99,
"learning_rate": 1.0071232713400095e-05,
"loss": 1.4028,
"step": 66900
},
{
"epoch": 2.0,
"learning_rate": 1.0041428230805914e-05,
"loss": 1.4174,
"step": 67000
},
{
"epoch": 2.0,
"learning_rate": 1.0011623748211731e-05,
"loss": 1.3446,
"step": 67100
},
{
"epoch": 2.0,
"eval_gen_len": 18.9115,
"eval_loss": 1.5060008764266968,
"eval_rouge1": 32.1108,
"eval_rouge2": 17.1408,
"eval_rougeL": 27.7833,
"eval_rougeLsum": 27.7703,
"eval_runtime": 292.3222,
"eval_samples_per_second": 9.277,
"eval_steps_per_second": 1.16,
"step": 67104
},
{
"epoch": 2.0,
"learning_rate": 9.981819265617549e-06,
"loss": 1.3076,
"step": 67200
},
{
"epoch": 2.01,
"learning_rate": 9.952014783023368e-06,
"loss": 1.3116,
"step": 67300
},
{
"epoch": 2.01,
"learning_rate": 9.922210300429185e-06,
"loss": 1.352,
"step": 67400
},
{
"epoch": 2.01,
"learning_rate": 9.892405817835002e-06,
"loss": 1.3229,
"step": 67500
},
{
"epoch": 2.01,
"learning_rate": 9.86260133524082e-06,
"loss": 1.2791,
"step": 67600
},
{
"epoch": 2.02,
"learning_rate": 9.832796852646639e-06,
"loss": 1.3413,
"step": 67700
},
{
"epoch": 2.02,
"learning_rate": 9.802992370052456e-06,
"loss": 1.313,
"step": 67800
},
{
"epoch": 2.02,
"learning_rate": 9.773187887458275e-06,
"loss": 1.3097,
"step": 67900
},
{
"epoch": 2.03,
"learning_rate": 9.743383404864092e-06,
"loss": 1.3202,
"step": 68000
},
{
"epoch": 2.03,
"learning_rate": 9.71357892226991e-06,
"loss": 1.3114,
"step": 68100
},
{
"epoch": 2.03,
"learning_rate": 9.683774439675727e-06,
"loss": 1.2995,
"step": 68200
},
{
"epoch": 2.04,
"learning_rate": 9.653969957081544e-06,
"loss": 1.2941,
"step": 68300
},
{
"epoch": 2.04,
"learning_rate": 9.624165474487363e-06,
"loss": 1.3195,
"step": 68400
},
{
"epoch": 2.04,
"learning_rate": 9.59436099189318e-06,
"loss": 1.3223,
"step": 68500
},
{
"epoch": 2.04,
"learning_rate": 9.564556509299e-06,
"loss": 1.2635,
"step": 68600
},
{
"epoch": 2.05,
"learning_rate": 9.534752026704817e-06,
"loss": 1.3237,
"step": 68700
},
{
"epoch": 2.05,
"learning_rate": 9.504947544110634e-06,
"loss": 1.3631,
"step": 68800
},
{
"epoch": 2.05,
"learning_rate": 9.475143061516453e-06,
"loss": 1.3345,
"step": 68900
},
{
"epoch": 2.06,
"learning_rate": 9.445338578922269e-06,
"loss": 1.2867,
"step": 69000
},
{
"epoch": 2.06,
"learning_rate": 9.415534096328088e-06,
"loss": 1.2966,
"step": 69100
},
{
"epoch": 2.06,
"learning_rate": 9.385729613733905e-06,
"loss": 1.3146,
"step": 69200
},
{
"epoch": 2.07,
"learning_rate": 9.355925131139724e-06,
"loss": 1.3152,
"step": 69300
},
{
"epoch": 2.07,
"learning_rate": 9.326120648545542e-06,
"loss": 1.2995,
"step": 69400
},
{
"epoch": 2.07,
"learning_rate": 9.29631616595136e-06,
"loss": 1.3113,
"step": 69500
},
{
"epoch": 2.07,
"learning_rate": 9.266511683357178e-06,
"loss": 1.3072,
"step": 69600
},
{
"epoch": 2.08,
"learning_rate": 9.236707200762994e-06,
"loss": 1.2986,
"step": 69700
},
{
"epoch": 2.08,
"learning_rate": 9.206902718168813e-06,
"loss": 1.3205,
"step": 69800
},
{
"epoch": 2.08,
"learning_rate": 9.17709823557463e-06,
"loss": 1.3499,
"step": 69900
},
{
"epoch": 2.09,
"learning_rate": 9.147293752980449e-06,
"loss": 1.3307,
"step": 70000
},
{
"epoch": 2.09,
"learning_rate": 9.117489270386266e-06,
"loss": 1.3387,
"step": 70100
},
{
"epoch": 2.09,
"learning_rate": 9.087684787792085e-06,
"loss": 1.3261,
"step": 70200
},
{
"epoch": 2.1,
"learning_rate": 9.058178350023845e-06,
"loss": 1.2811,
"step": 70300
},
{
"epoch": 2.1,
"learning_rate": 9.028373867429663e-06,
"loss": 1.2962,
"step": 70400
},
{
"epoch": 2.1,
"learning_rate": 8.998569384835478e-06,
"loss": 1.3124,
"step": 70500
},
{
"epoch": 2.1,
"learning_rate": 8.968764902241297e-06,
"loss": 1.3171,
"step": 70600
},
{
"epoch": 2.11,
"learning_rate": 8.938960419647115e-06,
"loss": 1.3073,
"step": 70700
},
{
"epoch": 2.11,
"learning_rate": 8.909155937052934e-06,
"loss": 1.3202,
"step": 70800
},
{
"epoch": 2.11,
"learning_rate": 8.879351454458751e-06,
"loss": 1.3314,
"step": 70900
},
{
"epoch": 2.12,
"learning_rate": 8.84954697186457e-06,
"loss": 1.3105,
"step": 71000
},
{
"epoch": 2.12,
"learning_rate": 8.819742489270387e-06,
"loss": 1.3241,
"step": 71100
},
{
"epoch": 2.12,
"learning_rate": 8.789938006676203e-06,
"loss": 1.3228,
"step": 71200
},
{
"epoch": 2.13,
"learning_rate": 8.760133524082022e-06,
"loss": 1.3377,
"step": 71300
},
{
"epoch": 2.13,
"learning_rate": 8.73032904148784e-06,
"loss": 1.263,
"step": 71400
},
{
"epoch": 2.13,
"learning_rate": 8.700524558893658e-06,
"loss": 1.2543,
"step": 71500
},
{
"epoch": 2.13,
"learning_rate": 8.670720076299476e-06,
"loss": 1.3436,
"step": 71600
},
{
"epoch": 2.14,
"learning_rate": 8.640915593705295e-06,
"loss": 1.3391,
"step": 71700
},
{
"epoch": 2.14,
"learning_rate": 8.611111111111112e-06,
"loss": 1.3108,
"step": 71800
},
{
"epoch": 2.14,
"learning_rate": 8.58130662851693e-06,
"loss": 1.2825,
"step": 71900
},
{
"epoch": 2.15,
"learning_rate": 8.551502145922747e-06,
"loss": 1.3004,
"step": 72000
},
{
"epoch": 2.15,
"learning_rate": 8.521697663328564e-06,
"loss": 1.312,
"step": 72100
},
{
"epoch": 2.15,
"learning_rate": 8.491893180734383e-06,
"loss": 1.3004,
"step": 72200
},
{
"epoch": 2.15,
"learning_rate": 8.4620886981402e-06,
"loss": 1.3638,
"step": 72300
},
{
"epoch": 2.16,
"learning_rate": 8.43228421554602e-06,
"loss": 1.3283,
"step": 72400
},
{
"epoch": 2.16,
"learning_rate": 8.402479732951837e-06,
"loss": 1.2835,
"step": 72500
},
{
"epoch": 2.16,
"learning_rate": 8.372675250357656e-06,
"loss": 1.3222,
"step": 72600
},
{
"epoch": 2.17,
"learning_rate": 8.342870767763471e-06,
"loss": 1.3031,
"step": 72700
},
{
"epoch": 2.17,
"learning_rate": 8.313066285169288e-06,
"loss": 1.3239,
"step": 72800
},
{
"epoch": 2.17,
"learning_rate": 8.283261802575108e-06,
"loss": 1.3093,
"step": 72900
},
{
"epoch": 2.18,
"learning_rate": 8.253457319980925e-06,
"loss": 1.3053,
"step": 73000
},
{
"epoch": 2.18,
"learning_rate": 8.223652837386744e-06,
"loss": 1.3409,
"step": 73100
},
{
"epoch": 2.18,
"learning_rate": 8.193848354792561e-06,
"loss": 1.3406,
"step": 73200
},
{
"epoch": 2.18,
"learning_rate": 8.16404387219838e-06,
"loss": 1.3051,
"step": 73300
},
{
"epoch": 2.19,
"learning_rate": 8.134239389604196e-06,
"loss": 1.2662,
"step": 73400
},
{
"epoch": 2.19,
"learning_rate": 8.104434907010013e-06,
"loss": 1.3195,
"step": 73500
},
{
"epoch": 2.19,
"learning_rate": 8.074630424415832e-06,
"loss": 1.2922,
"step": 73600
},
{
"epoch": 2.2,
"learning_rate": 8.04482594182165e-06,
"loss": 1.3208,
"step": 73700
},
{
"epoch": 2.2,
"learning_rate": 8.015021459227469e-06,
"loss": 1.3047,
"step": 73800
},
{
"epoch": 2.2,
"learning_rate": 7.985216976633286e-06,
"loss": 1.3148,
"step": 73900
},
{
"epoch": 2.21,
"learning_rate": 7.955412494039105e-06,
"loss": 1.2982,
"step": 74000
},
{
"epoch": 2.21,
"learning_rate": 7.925608011444922e-06,
"loss": 1.323,
"step": 74100
},
{
"epoch": 2.21,
"learning_rate": 7.895803528850738e-06,
"loss": 1.2963,
"step": 74200
},
{
"epoch": 2.21,
"learning_rate": 7.865999046256557e-06,
"loss": 1.2981,
"step": 74300
},
{
"epoch": 2.22,
"learning_rate": 7.836194563662374e-06,
"loss": 1.2817,
"step": 74400
},
{
"epoch": 2.22,
"learning_rate": 7.806390081068193e-06,
"loss": 1.3061,
"step": 74500
},
{
"epoch": 2.22,
"learning_rate": 7.77658559847401e-06,
"loss": 1.302,
"step": 74600
},
{
"epoch": 2.23,
"learning_rate": 7.74678111587983e-06,
"loss": 1.3064,
"step": 74700
},
{
"epoch": 2.23,
"learning_rate": 7.716976633285647e-06,
"loss": 1.2704,
"step": 74800
},
{
"epoch": 2.23,
"learning_rate": 7.687172150691464e-06,
"loss": 1.2871,
"step": 74900
},
{
"epoch": 2.24,
"learning_rate": 7.657367668097281e-06,
"loss": 1.3032,
"step": 75000
},
{
"epoch": 2.24,
"learning_rate": 7.627563185503099e-06,
"loss": 1.3276,
"step": 75100
},
{
"epoch": 2.24,
"learning_rate": 7.597758702908918e-06,
"loss": 1.2922,
"step": 75200
},
{
"epoch": 2.24,
"learning_rate": 7.567954220314735e-06,
"loss": 1.3013,
"step": 75300
},
{
"epoch": 2.25,
"learning_rate": 7.538149737720554e-06,
"loss": 1.3174,
"step": 75400
},
{
"epoch": 2.25,
"learning_rate": 7.508345255126371e-06,
"loss": 1.3155,
"step": 75500
},
{
"epoch": 2.25,
"learning_rate": 7.478540772532189e-06,
"loss": 1.3232,
"step": 75600
},
{
"epoch": 2.26,
"learning_rate": 7.448736289938007e-06,
"loss": 1.2449,
"step": 75700
},
{
"epoch": 2.26,
"learning_rate": 7.418931807343825e-06,
"loss": 1.297,
"step": 75800
},
{
"epoch": 2.26,
"learning_rate": 7.3891273247496425e-06,
"loss": 1.3049,
"step": 75900
},
{
"epoch": 2.27,
"learning_rate": 7.359322842155461e-06,
"loss": 1.2848,
"step": 76000
},
{
"epoch": 2.27,
"learning_rate": 7.329816404387221e-06,
"loss": 1.2833,
"step": 76100
},
{
"epoch": 2.27,
"learning_rate": 7.300011921793038e-06,
"loss": 1.281,
"step": 76200
},
{
"epoch": 2.27,
"learning_rate": 7.270207439198855e-06,
"loss": 1.3107,
"step": 76300
},
{
"epoch": 2.28,
"learning_rate": 7.240402956604673e-06,
"loss": 1.2745,
"step": 76400
},
{
"epoch": 2.28,
"learning_rate": 7.210598474010492e-06,
"loss": 1.3614,
"step": 76500
},
{
"epoch": 2.28,
"learning_rate": 7.180793991416309e-06,
"loss": 1.2813,
"step": 76600
},
{
"epoch": 2.29,
"learning_rate": 7.150989508822127e-06,
"loss": 1.3131,
"step": 76700
},
{
"epoch": 2.29,
"learning_rate": 7.121185026227945e-06,
"loss": 1.2976,
"step": 76800
},
{
"epoch": 2.29,
"learning_rate": 7.091380543633763e-06,
"loss": 1.3525,
"step": 76900
},
{
"epoch": 2.29,
"learning_rate": 7.06157606103958e-06,
"loss": 1.3217,
"step": 77000
},
{
"epoch": 2.3,
"learning_rate": 7.031771578445398e-06,
"loss": 1.2728,
"step": 77100
},
{
"epoch": 2.3,
"learning_rate": 7.001967095851216e-06,
"loss": 1.3291,
"step": 77200
},
{
"epoch": 2.3,
"learning_rate": 6.972162613257034e-06,
"loss": 1.3162,
"step": 77300
},
{
"epoch": 2.31,
"learning_rate": 6.942358130662852e-06,
"loss": 1.273,
"step": 77400
},
{
"epoch": 2.31,
"learning_rate": 6.91255364806867e-06,
"loss": 1.2917,
"step": 77500
},
{
"epoch": 2.31,
"learning_rate": 6.882749165474488e-06,
"loss": 1.3109,
"step": 77600
},
{
"epoch": 2.32,
"learning_rate": 6.8529446828803046e-06,
"loss": 1.3239,
"step": 77700
},
{
"epoch": 2.32,
"learning_rate": 6.823140200286123e-06,
"loss": 1.3042,
"step": 77800
},
{
"epoch": 2.32,
"learning_rate": 6.793335717691941e-06,
"loss": 1.2799,
"step": 77900
},
{
"epoch": 2.32,
"learning_rate": 6.763531235097759e-06,
"loss": 1.3083,
"step": 78000
},
{
"epoch": 2.33,
"learning_rate": 6.733726752503576e-06,
"loss": 1.2967,
"step": 78100
},
{
"epoch": 2.33,
"learning_rate": 6.7039222699093946e-06,
"loss": 1.3231,
"step": 78200
},
{
"epoch": 2.33,
"learning_rate": 6.674117787315213e-06,
"loss": 1.3679,
"step": 78300
},
{
"epoch": 2.34,
"learning_rate": 6.644313304721031e-06,
"loss": 1.3178,
"step": 78400
},
{
"epoch": 2.34,
"learning_rate": 6.614508822126847e-06,
"loss": 1.305,
"step": 78500
},
{
"epoch": 2.34,
"learning_rate": 6.5847043395326656e-06,
"loss": 1.329,
"step": 78600
},
{
"epoch": 2.35,
"learning_rate": 6.554899856938484e-06,
"loss": 1.3147,
"step": 78700
},
{
"epoch": 2.35,
"learning_rate": 6.525095374344301e-06,
"loss": 1.3129,
"step": 78800
},
{
"epoch": 2.35,
"learning_rate": 6.495290891750119e-06,
"loss": 1.295,
"step": 78900
},
{
"epoch": 2.35,
"learning_rate": 6.465486409155937e-06,
"loss": 1.3236,
"step": 79000
},
{
"epoch": 2.36,
"learning_rate": 6.4356819265617556e-06,
"loss": 1.3012,
"step": 79100
},
{
"epoch": 2.36,
"learning_rate": 6.405877443967573e-06,
"loss": 1.2792,
"step": 79200
},
{
"epoch": 2.36,
"learning_rate": 6.37607296137339e-06,
"loss": 1.3223,
"step": 79300
},
{
"epoch": 2.37,
"learning_rate": 6.346268478779208e-06,
"loss": 1.3346,
"step": 79400
},
{
"epoch": 2.37,
"learning_rate": 6.3164639961850266e-06,
"loss": 1.3006,
"step": 79500
},
{
"epoch": 2.37,
"learning_rate": 6.286659513590844e-06,
"loss": 1.3093,
"step": 79600
},
{
"epoch": 2.38,
"learning_rate": 6.256855030996662e-06,
"loss": 1.33,
"step": 79700
},
{
"epoch": 2.38,
"learning_rate": 6.22705054840248e-06,
"loss": 1.3127,
"step": 79800
},
{
"epoch": 2.38,
"learning_rate": 6.197246065808298e-06,
"loss": 1.2684,
"step": 79900
},
{
"epoch": 2.38,
"learning_rate": 6.167441583214116e-06,
"loss": 1.2938,
"step": 80000
},
{
"epoch": 2.39,
"learning_rate": 6.137637100619933e-06,
"loss": 1.3103,
"step": 80100
},
{
"epoch": 2.39,
"learning_rate": 6.107832618025751e-06,
"loss": 1.307,
"step": 80200
},
{
"epoch": 2.39,
"learning_rate": 6.0780281354315685e-06,
"loss": 1.327,
"step": 80300
},
{
"epoch": 2.4,
"learning_rate": 6.048223652837387e-06,
"loss": 1.292,
"step": 80400
},
{
"epoch": 2.4,
"learning_rate": 6.018419170243205e-06,
"loss": 1.3067,
"step": 80500
},
{
"epoch": 2.4,
"learning_rate": 5.988614687649023e-06,
"loss": 1.2973,
"step": 80600
},
{
"epoch": 2.41,
"learning_rate": 5.95881020505484e-06,
"loss": 1.3501,
"step": 80700
},
{
"epoch": 2.41,
"learning_rate": 5.9290057224606586e-06,
"loss": 1.2745,
"step": 80800
},
{
"epoch": 2.41,
"learning_rate": 5.899201239866476e-06,
"loss": 1.3246,
"step": 80900
},
{
"epoch": 2.41,
"learning_rate": 5.869694802098236e-06,
"loss": 1.3207,
"step": 81000
},
{
"epoch": 2.42,
"learning_rate": 5.839890319504053e-06,
"loss": 1.2823,
"step": 81100
},
{
"epoch": 2.42,
"learning_rate": 5.810085836909871e-06,
"loss": 1.307,
"step": 81200
},
{
"epoch": 2.42,
"learning_rate": 5.7802813543156895e-06,
"loss": 1.3289,
"step": 81300
},
{
"epoch": 2.43,
"learning_rate": 5.750476871721508e-06,
"loss": 1.271,
"step": 81400
},
{
"epoch": 2.43,
"learning_rate": 5.720672389127325e-06,
"loss": 1.3111,
"step": 81500
},
{
"epoch": 2.43,
"learning_rate": 5.690867906533142e-06,
"loss": 1.3137,
"step": 81600
},
{
"epoch": 2.44,
"learning_rate": 5.6610634239389605e-06,
"loss": 1.3168,
"step": 81700
},
{
"epoch": 2.44,
"learning_rate": 5.631258941344778e-06,
"loss": 1.3243,
"step": 81800
},
{
"epoch": 2.44,
"learning_rate": 5.601454458750596e-06,
"loss": 1.2846,
"step": 81900
},
{
"epoch": 2.44,
"learning_rate": 5.571649976156414e-06,
"loss": 1.3359,
"step": 82000
},
{
"epoch": 2.45,
"learning_rate": 5.541845493562232e-06,
"loss": 1.312,
"step": 82100
},
{
"epoch": 2.45,
"learning_rate": 5.51204101096805e-06,
"loss": 1.304,
"step": 82200
},
{
"epoch": 2.45,
"learning_rate": 5.482236528373868e-06,
"loss": 1.3005,
"step": 82300
},
{
"epoch": 2.46,
"learning_rate": 5.452432045779685e-06,
"loss": 1.3196,
"step": 82400
},
{
"epoch": 2.46,
"learning_rate": 5.422925608011445e-06,
"loss": 1.325,
"step": 82500
},
{
"epoch": 2.46,
"learning_rate": 5.393121125417262e-06,
"loss": 1.2993,
"step": 82600
},
{
"epoch": 2.46,
"learning_rate": 5.3633166428230805e-06,
"loss": 1.3078,
"step": 82700
},
{
"epoch": 2.47,
"learning_rate": 5.333512160228899e-06,
"loss": 1.2723,
"step": 82800
},
{
"epoch": 2.47,
"learning_rate": 5.303707677634717e-06,
"loss": 1.3061,
"step": 82900
},
{
"epoch": 2.47,
"learning_rate": 5.273903195040534e-06,
"loss": 1.2831,
"step": 83000
},
{
"epoch": 2.48,
"learning_rate": 5.244098712446352e-06,
"loss": 1.3367,
"step": 83100
},
{
"epoch": 2.48,
"learning_rate": 5.21429422985217e-06,
"loss": 1.3299,
"step": 83200
},
{
"epoch": 2.48,
"learning_rate": 5.184489747257988e-06,
"loss": 1.2967,
"step": 83300
},
{
"epoch": 2.49,
"learning_rate": 5.154685264663805e-06,
"loss": 1.2826,
"step": 83400
},
{
"epoch": 2.49,
"learning_rate": 5.124880782069623e-06,
"loss": 1.2924,
"step": 83500
},
{
"epoch": 2.49,
"learning_rate": 5.0950762994754415e-06,
"loss": 1.287,
"step": 83600
},
{
"epoch": 2.49,
"learning_rate": 5.065271816881259e-06,
"loss": 1.3353,
"step": 83700
},
{
"epoch": 2.5,
"learning_rate": 5.035467334287077e-06,
"loss": 1.2794,
"step": 83800
},
{
"epoch": 2.5,
"learning_rate": 5.005960896518837e-06,
"loss": 1.2811,
"step": 83900
},
{
"epoch": 2.5,
"learning_rate": 4.976156413924654e-06,
"loss": 1.2638,
"step": 84000
},
{
"epoch": 2.51,
"learning_rate": 4.946351931330472e-06,
"loss": 1.34,
"step": 84100
},
{
"epoch": 2.51,
"learning_rate": 4.91654744873629e-06,
"loss": 1.2886,
"step": 84200
},
{
"epoch": 2.51,
"learning_rate": 4.886742966142108e-06,
"loss": 1.2936,
"step": 84300
},
{
"epoch": 2.52,
"learning_rate": 4.856938483547926e-06,
"loss": 1.2905,
"step": 84400
},
{
"epoch": 2.52,
"learning_rate": 4.8271340009537434e-06,
"loss": 1.3039,
"step": 84500
},
{
"epoch": 2.52,
"learning_rate": 4.797329518359562e-06,
"loss": 1.2702,
"step": 84600
},
{
"epoch": 2.52,
"learning_rate": 4.76752503576538e-06,
"loss": 1.3245,
"step": 84700
},
{
"epoch": 2.53,
"learning_rate": 4.737720553171197e-06,
"loss": 1.298,
"step": 84800
},
{
"epoch": 2.53,
"learning_rate": 4.7079160705770144e-06,
"loss": 1.3183,
"step": 84900
},
{
"epoch": 2.53,
"learning_rate": 4.678111587982833e-06,
"loss": 1.2971,
"step": 85000
},
{
"epoch": 2.54,
"learning_rate": 4.648307105388651e-06,
"loss": 1.2877,
"step": 85100
},
{
"epoch": 2.54,
"learning_rate": 4.618502622794468e-06,
"loss": 1.2796,
"step": 85200
},
{
"epoch": 2.54,
"learning_rate": 4.588698140200286e-06,
"loss": 1.325,
"step": 85300
},
{
"epoch": 2.55,
"learning_rate": 4.5588936576061044e-06,
"loss": 1.3102,
"step": 85400
},
{
"epoch": 2.55,
"learning_rate": 4.529089175011923e-06,
"loss": 1.3211,
"step": 85500
},
{
"epoch": 2.55,
"learning_rate": 4.499284692417739e-06,
"loss": 1.2872,
"step": 85600
},
{
"epoch": 2.55,
"learning_rate": 4.469480209823557e-06,
"loss": 1.3031,
"step": 85700
},
{
"epoch": 2.56,
"learning_rate": 4.4396757272293754e-06,
"loss": 1.3126,
"step": 85800
},
{
"epoch": 2.56,
"learning_rate": 4.409871244635194e-06,
"loss": 1.3138,
"step": 85900
},
{
"epoch": 2.56,
"learning_rate": 4.380066762041011e-06,
"loss": 1.3186,
"step": 86000
},
{
"epoch": 2.57,
"learning_rate": 4.350262279446829e-06,
"loss": 1.3243,
"step": 86100
},
{
"epoch": 2.57,
"learning_rate": 4.320457796852647e-06,
"loss": 1.2839,
"step": 86200
},
{
"epoch": 2.57,
"learning_rate": 4.290653314258465e-06,
"loss": 1.266,
"step": 86300
},
{
"epoch": 2.58,
"learning_rate": 4.260848831664282e-06,
"loss": 1.2661,
"step": 86400
},
{
"epoch": 2.58,
"learning_rate": 4.2310443490701e-06,
"loss": 1.2955,
"step": 86500
},
{
"epoch": 2.58,
"learning_rate": 4.201239866475918e-06,
"loss": 1.2725,
"step": 86600
},
{
"epoch": 2.58,
"learning_rate": 4.171435383881736e-06,
"loss": 1.3274,
"step": 86700
},
{
"epoch": 2.59,
"learning_rate": 4.141630901287554e-06,
"loss": 1.291,
"step": 86800
},
{
"epoch": 2.59,
"learning_rate": 4.111826418693372e-06,
"loss": 1.313,
"step": 86900
},
{
"epoch": 2.59,
"learning_rate": 4.08202193609919e-06,
"loss": 1.3116,
"step": 87000
},
{
"epoch": 2.6,
"learning_rate": 4.052217453505007e-06,
"loss": 1.2802,
"step": 87100
},
{
"epoch": 2.6,
"learning_rate": 4.022412970910825e-06,
"loss": 1.3158,
"step": 87200
},
{
"epoch": 2.6,
"learning_rate": 3.992608488316643e-06,
"loss": 1.2753,
"step": 87300
},
{
"epoch": 2.6,
"learning_rate": 3.962804005722461e-06,
"loss": 1.2926,
"step": 87400
},
{
"epoch": 2.61,
"learning_rate": 3.932999523128278e-06,
"loss": 1.2848,
"step": 87500
},
{
"epoch": 2.61,
"learning_rate": 3.903195040534097e-06,
"loss": 1.3019,
"step": 87600
},
{
"epoch": 2.61,
"learning_rate": 3.873390557939915e-06,
"loss": 1.3003,
"step": 87700
},
{
"epoch": 2.62,
"learning_rate": 3.843586075345732e-06,
"loss": 1.2979,
"step": 87800
},
{
"epoch": 2.62,
"learning_rate": 3.8137815927515494e-06,
"loss": 1.2929,
"step": 87900
},
{
"epoch": 2.62,
"learning_rate": 3.7839771101573676e-06,
"loss": 1.2944,
"step": 88000
},
{
"epoch": 2.63,
"learning_rate": 3.7541726275631853e-06,
"loss": 1.2952,
"step": 88100
},
{
"epoch": 2.63,
"learning_rate": 3.7243681449690035e-06,
"loss": 1.3206,
"step": 88200
},
{
"epoch": 2.63,
"learning_rate": 3.6945636623748213e-06,
"loss": 1.272,
"step": 88300
},
{
"epoch": 2.63,
"learning_rate": 3.6647591797806394e-06,
"loss": 1.3305,
"step": 88400
},
{
"epoch": 2.64,
"learning_rate": 3.6349546971864567e-06,
"loss": 1.2863,
"step": 88500
},
{
"epoch": 2.64,
"learning_rate": 3.605150214592275e-06,
"loss": 1.3274,
"step": 88600
},
{
"epoch": 2.64,
"learning_rate": 3.5753457319980927e-06,
"loss": 1.2402,
"step": 88700
},
{
"epoch": 2.65,
"learning_rate": 3.545541249403911e-06,
"loss": 1.2966,
"step": 88800
},
{
"epoch": 2.65,
"learning_rate": 3.515736766809728e-06,
"loss": 1.2915,
"step": 88900
},
{
"epoch": 2.65,
"learning_rate": 3.485932284215546e-06,
"loss": 1.2973,
"step": 89000
},
{
"epoch": 2.66,
"learning_rate": 3.456127801621364e-06,
"loss": 1.2655,
"step": 89100
},
{
"epoch": 2.66,
"learning_rate": 3.4263233190271814e-06,
"loss": 1.305,
"step": 89200
},
{
"epoch": 2.66,
"learning_rate": 3.3965188364329996e-06,
"loss": 1.2942,
"step": 89300
},
{
"epoch": 2.66,
"learning_rate": 3.3667143538388173e-06,
"loss": 1.2901,
"step": 89400
},
{
"epoch": 2.67,
"learning_rate": 3.3369098712446355e-06,
"loss": 1.2841,
"step": 89500
},
{
"epoch": 2.67,
"learning_rate": 3.307105388650453e-06,
"loss": 1.3179,
"step": 89600
},
{
"epoch": 2.67,
"learning_rate": 3.277300906056271e-06,
"loss": 1.338,
"step": 89700
},
{
"epoch": 2.68,
"learning_rate": 3.2474964234620887e-06,
"loss": 1.2943,
"step": 89800
},
{
"epoch": 2.68,
"learning_rate": 3.217691940867907e-06,
"loss": 1.3189,
"step": 89900
},
{
"epoch": 2.68,
"learning_rate": 3.1878874582737242e-06,
"loss": 1.3266,
"step": 90000
},
{
"epoch": 2.69,
"learning_rate": 3.1580829756795424e-06,
"loss": 1.3228,
"step": 90100
},
{
"epoch": 2.69,
"learning_rate": 3.12827849308536e-06,
"loss": 1.297,
"step": 90200
},
{
"epoch": 2.69,
"learning_rate": 3.098474010491178e-06,
"loss": 1.2824,
"step": 90300
},
{
"epoch": 2.69,
"learning_rate": 3.0686695278969957e-06,
"loss": 1.3217,
"step": 90400
},
{
"epoch": 2.7,
"learning_rate": 3.0388650453028134e-06,
"loss": 1.2884,
"step": 90500
},
{
"epoch": 2.7,
"learning_rate": 3.0090605627086316e-06,
"loss": 1.2947,
"step": 90600
},
{
"epoch": 2.7,
"learning_rate": 2.9792560801144493e-06,
"loss": 1.2985,
"step": 90700
},
{
"epoch": 2.71,
"learning_rate": 2.949451597520267e-06,
"loss": 1.3063,
"step": 90800
},
{
"epoch": 2.71,
"learning_rate": 2.919647114926085e-06,
"loss": 1.2729,
"step": 90900
},
{
"epoch": 2.71,
"learning_rate": 2.889842632331903e-06,
"loss": 1.3206,
"step": 91000
},
{
"epoch": 2.72,
"learning_rate": 2.8600381497377207e-06,
"loss": 1.2862,
"step": 91100
},
{
"epoch": 2.72,
"learning_rate": 2.8302336671435385e-06,
"loss": 1.3047,
"step": 91200
},
{
"epoch": 2.72,
"learning_rate": 2.8004291845493562e-06,
"loss": 1.3283,
"step": 91300
},
{
"epoch": 2.72,
"learning_rate": 2.7706247019551744e-06,
"loss": 1.3355,
"step": 91400
},
{
"epoch": 2.73,
"learning_rate": 2.740820219360992e-06,
"loss": 1.294,
"step": 91500
},
{
"epoch": 2.73,
"learning_rate": 2.7110157367668095e-06,
"loss": 1.2771,
"step": 91600
},
{
"epoch": 2.73,
"learning_rate": 2.6812112541726277e-06,
"loss": 1.2671,
"step": 91700
},
{
"epoch": 2.74,
"learning_rate": 2.6514067715784454e-06,
"loss": 1.2855,
"step": 91800
},
{
"epoch": 2.74,
"learning_rate": 2.6216022889842636e-06,
"loss": 1.2664,
"step": 91900
},
{
"epoch": 2.74,
"learning_rate": 2.5920958512160226e-06,
"loss": 1.3065,
"step": 92000
},
{
"epoch": 2.74,
"learning_rate": 2.562291368621841e-06,
"loss": 1.2835,
"step": 92100
},
{
"epoch": 2.75,
"learning_rate": 2.5324868860276586e-06,
"loss": 1.2752,
"step": 92200
},
{
"epoch": 2.75,
"learning_rate": 2.5026824034334767e-06,
"loss": 1.3331,
"step": 92300
},
{
"epoch": 2.75,
"learning_rate": 2.472877920839294e-06,
"loss": 1.2784,
"step": 92400
},
{
"epoch": 2.76,
"learning_rate": 2.4430734382451122e-06,
"loss": 1.3154,
"step": 92500
},
{
"epoch": 2.76,
"learning_rate": 2.41326895565093e-06,
"loss": 1.2989,
"step": 92600
},
{
"epoch": 2.76,
"learning_rate": 2.383464473056748e-06,
"loss": 1.2854,
"step": 92700
},
{
"epoch": 2.77,
"learning_rate": 2.3536599904625655e-06,
"loss": 1.3367,
"step": 92800
},
{
"epoch": 2.77,
"learning_rate": 2.3238555078683836e-06,
"loss": 1.2928,
"step": 92900
},
{
"epoch": 2.77,
"learning_rate": 2.2940510252742014e-06,
"loss": 1.2888,
"step": 93000
},
{
"epoch": 2.77,
"learning_rate": 2.264246542680019e-06,
"loss": 1.2786,
"step": 93100
},
{
"epoch": 2.78,
"learning_rate": 2.234442060085837e-06,
"loss": 1.2806,
"step": 93200
},
{
"epoch": 2.78,
"learning_rate": 2.2046375774916546e-06,
"loss": 1.3047,
"step": 93300
},
{
"epoch": 2.78,
"learning_rate": 2.174833094897473e-06,
"loss": 1.2596,
"step": 93400
},
{
"epoch": 2.79,
"learning_rate": 2.14502861230329e-06,
"loss": 1.3129,
"step": 93500
},
{
"epoch": 2.79,
"learning_rate": 2.1152241297091083e-06,
"loss": 1.3005,
"step": 93600
},
{
"epoch": 2.79,
"learning_rate": 2.085419647114926e-06,
"loss": 1.2638,
"step": 93700
},
{
"epoch": 2.8,
"learning_rate": 2.0556151645207442e-06,
"loss": 1.305,
"step": 93800
},
{
"epoch": 2.8,
"learning_rate": 2.0258106819265615e-06,
"loss": 1.2499,
"step": 93900
},
{
"epoch": 2.8,
"learning_rate": 1.9960061993323797e-06,
"loss": 1.2803,
"step": 94000
},
{
"epoch": 2.8,
"learning_rate": 1.9662017167381975e-06,
"loss": 1.2982,
"step": 94100
},
{
"epoch": 2.81,
"learning_rate": 1.9363972341440156e-06,
"loss": 1.2846,
"step": 94200
},
{
"epoch": 2.81,
"learning_rate": 1.906592751549833e-06,
"loss": 1.3006,
"step": 94300
},
{
"epoch": 2.81,
"learning_rate": 1.876788268955651e-06,
"loss": 1.2996,
"step": 94400
},
{
"epoch": 2.82,
"learning_rate": 1.8472818311874106e-06,
"loss": 1.2905,
"step": 94500
},
{
"epoch": 2.82,
"learning_rate": 1.8174773485932284e-06,
"loss": 1.2661,
"step": 94600
},
{
"epoch": 2.82,
"learning_rate": 1.7876728659990463e-06,
"loss": 1.2848,
"step": 94700
},
{
"epoch": 2.83,
"learning_rate": 1.757868383404864e-06,
"loss": 1.2962,
"step": 94800
},
{
"epoch": 2.83,
"learning_rate": 1.728063900810682e-06,
"loss": 1.3271,
"step": 94900
},
{
"epoch": 2.83,
"learning_rate": 1.6982594182164998e-06,
"loss": 1.2668,
"step": 95000
},
{
"epoch": 2.83,
"learning_rate": 1.6684549356223177e-06,
"loss": 1.3104,
"step": 95100
},
{
"epoch": 2.84,
"learning_rate": 1.6386504530281355e-06,
"loss": 1.2795,
"step": 95200
},
{
"epoch": 2.84,
"learning_rate": 1.6088459704339535e-06,
"loss": 1.2807,
"step": 95300
},
{
"epoch": 2.84,
"learning_rate": 1.5790414878397712e-06,
"loss": 1.3023,
"step": 95400
},
{
"epoch": 2.85,
"learning_rate": 1.549237005245589e-06,
"loss": 1.2996,
"step": 95500
},
{
"epoch": 2.85,
"learning_rate": 1.5194325226514067e-06,
"loss": 1.3163,
"step": 95600
},
{
"epoch": 2.85,
"learning_rate": 1.4896280400572247e-06,
"loss": 1.3352,
"step": 95700
},
{
"epoch": 2.86,
"learning_rate": 1.4598235574630424e-06,
"loss": 1.2638,
"step": 95800
},
{
"epoch": 2.86,
"learning_rate": 1.4300190748688604e-06,
"loss": 1.3498,
"step": 95900
},
{
"epoch": 2.86,
"learning_rate": 1.4002145922746781e-06,
"loss": 1.2619,
"step": 96000
},
{
"epoch": 2.86,
"learning_rate": 1.370410109680496e-06,
"loss": 1.3227,
"step": 96100
},
{
"epoch": 2.87,
"learning_rate": 1.3406056270863138e-06,
"loss": 1.2838,
"step": 96200
},
{
"epoch": 2.87,
"learning_rate": 1.3108011444921318e-06,
"loss": 1.2992,
"step": 96300
},
{
"epoch": 2.87,
"learning_rate": 1.2809966618979495e-06,
"loss": 1.272,
"step": 96400
},
{
"epoch": 2.88,
"learning_rate": 1.2511921793037675e-06,
"loss": 1.2672,
"step": 96500
},
{
"epoch": 2.88,
"learning_rate": 1.2213876967095852e-06,
"loss": 1.307,
"step": 96600
},
{
"epoch": 2.88,
"learning_rate": 1.191881258941345e-06,
"loss": 1.2649,
"step": 96700
},
{
"epoch": 2.89,
"learning_rate": 1.1620767763471627e-06,
"loss": 1.2454,
"step": 96800
},
{
"epoch": 2.89,
"learning_rate": 1.1322722937529807e-06,
"loss": 1.2826,
"step": 96900
},
{
"epoch": 2.89,
"learning_rate": 1.1024678111587984e-06,
"loss": 1.3654,
"step": 97000
},
{
"epoch": 2.89,
"learning_rate": 1.0726633285646161e-06,
"loss": 1.2908,
"step": 97100
},
{
"epoch": 2.9,
"learning_rate": 1.042858845970434e-06,
"loss": 1.302,
"step": 97200
},
{
"epoch": 2.9,
"learning_rate": 1.0130543633762516e-06,
"loss": 1.2849,
"step": 97300
},
{
"epoch": 2.9,
"learning_rate": 9.832498807820696e-07,
"loss": 1.2789,
"step": 97400
},
{
"epoch": 2.91,
"learning_rate": 9.537434430138293e-07,
"loss": 1.2867,
"step": 97500
},
{
"epoch": 2.91,
"learning_rate": 9.239389604196472e-07,
"loss": 1.2662,
"step": 97600
},
{
"epoch": 2.91,
"learning_rate": 8.94134477825465e-07,
"loss": 1.2251,
"step": 97700
},
{
"epoch": 2.91,
"learning_rate": 8.643299952312829e-07,
"loss": 1.2888,
"step": 97800
},
{
"epoch": 2.92,
"learning_rate": 8.345255126371006e-07,
"loss": 1.2734,
"step": 97900
},
{
"epoch": 2.92,
"learning_rate": 8.047210300429185e-07,
"loss": 1.2751,
"step": 98000
},
{
"epoch": 2.92,
"learning_rate": 7.749165474487363e-07,
"loss": 1.2835,
"step": 98100
},
{
"epoch": 2.93,
"learning_rate": 7.451120648545542e-07,
"loss": 1.2643,
"step": 98200
},
{
"epoch": 2.93,
"learning_rate": 7.15307582260372e-07,
"loss": 1.3029,
"step": 98300
},
{
"epoch": 2.93,
"learning_rate": 6.855030996661899e-07,
"loss": 1.3015,
"step": 98400
},
{
"epoch": 2.94,
"learning_rate": 6.556986170720076e-07,
"loss": 1.2745,
"step": 98500
},
{
"epoch": 2.94,
"learning_rate": 6.261921793037673e-07,
"loss": 1.298,
"step": 98600
},
{
"epoch": 2.94,
"learning_rate": 5.963876967095852e-07,
"loss": 1.2703,
"step": 98700
},
{
"epoch": 2.94,
"learning_rate": 5.66583214115403e-07,
"loss": 1.2758,
"step": 98800
},
{
"epoch": 2.95,
"learning_rate": 5.367787315212208e-07,
"loss": 1.2944,
"step": 98900
},
{
"epoch": 2.95,
"learning_rate": 5.069742489270386e-07,
"loss": 1.3073,
"step": 99000
},
{
"epoch": 2.95,
"learning_rate": 4.771697663328565e-07,
"loss": 1.2741,
"step": 99100
},
{
"epoch": 2.96,
"learning_rate": 4.473652837386743e-07,
"loss": 1.2829,
"step": 99200
},
{
"epoch": 2.96,
"learning_rate": 4.1756080114449216e-07,
"loss": 1.3196,
"step": 99300
},
{
"epoch": 2.96,
"learning_rate": 3.8775631855030996e-07,
"loss": 1.3021,
"step": 99400
},
{
"epoch": 2.97,
"learning_rate": 3.579518359561278e-07,
"loss": 1.2743,
"step": 99500
},
{
"epoch": 2.97,
"learning_rate": 3.2814735336194567e-07,
"loss": 1.2547,
"step": 99600
},
{
"epoch": 2.97,
"learning_rate": 2.9834287076776347e-07,
"loss": 1.2993,
"step": 99700
},
{
"epoch": 2.97,
"learning_rate": 2.685383881735813e-07,
"loss": 1.2546,
"step": 99800
},
{
"epoch": 2.98,
"learning_rate": 2.387339055793992e-07,
"loss": 1.2743,
"step": 99900
},
{
"epoch": 2.98,
"learning_rate": 2.0892942298521698e-07,
"loss": 1.2957,
"step": 100000
},
{
"epoch": 2.98,
"learning_rate": 1.791249403910348e-07,
"loss": 1.2468,
"step": 100100
},
{
"epoch": 2.99,
"learning_rate": 1.4932045779685266e-07,
"loss": 1.2819,
"step": 100200
},
{
"epoch": 2.99,
"learning_rate": 1.1951597520267048e-07,
"loss": 1.2974,
"step": 100300
},
{
"epoch": 2.99,
"learning_rate": 8.971149260848831e-08,
"loss": 1.2679,
"step": 100400
},
{
"epoch": 3.0,
"learning_rate": 5.990701001430615e-08,
"loss": 1.2761,
"step": 100500
},
{
"epoch": 3.0,
"learning_rate": 3.010252742012399e-08,
"loss": 1.3245,
"step": 100600
},
{
"epoch": 3.0,
"eval_gen_len": 18.9801,
"eval_loss": 1.4905033111572266,
"eval_rouge1": 32.9084,
"eval_rouge2": 17.7027,
"eval_rougeL": 28.2912,
"eval_rougeLsum": 28.2975,
"eval_runtime": 291.9809,
"eval_samples_per_second": 9.288,
"eval_steps_per_second": 1.161,
"step": 100656
},
{
"epoch": 3.0,
"step": 100656,
"total_flos": 9.506404988551581e+17,
"train_loss": 1.5143268576412658,
"train_runtime": 40494.9096,
"train_samples_per_second": 19.885,
"train_steps_per_second": 2.486
}
],
"max_steps": 100656,
"num_train_epochs": 3,
"total_flos": 9.506404988551581e+17,
"trial_name": null,
"trial_params": null
}