MolReactGen-GuacaMol-Molecules / trainer_state.json
hogru's picture
Update tokenizer, bump hf versions
92eb358
{
"best_metric": 1.1810568571090698,
"best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-10-31_05-26-52_experiment/checkpoint-124300",
"epoch": 49.98492007640495,
"eval_steps": 500,
"global_step": 124300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 2.011263073209976e-05,
"loss": 4.9407,
"step": 100
},
{
"epoch": 0.08,
"learning_rate": 4.022526146419952e-05,
"loss": 4.4413,
"step": 200
},
{
"epoch": 0.12,
"learning_rate": 6.033789219629928e-05,
"loss": 4.048,
"step": 300
},
{
"epoch": 0.16,
"learning_rate": 8.045052292839905e-05,
"loss": 3.6757,
"step": 400
},
{
"epoch": 0.2,
"learning_rate": 0.0001005631536604988,
"loss": 3.3122,
"step": 500
},
{
"epoch": 0.24,
"learning_rate": 0.00012067578439259856,
"loss": 2.9853,
"step": 600
},
{
"epoch": 0.28,
"learning_rate": 0.0001407884151246983,
"loss": 2.7396,
"step": 700
},
{
"epoch": 0.32,
"learning_rate": 0.0001609010458567981,
"loss": 2.5627,
"step": 800
},
{
"epoch": 0.36,
"learning_rate": 0.00018101367658889783,
"loss": 2.4278,
"step": 900
},
{
"epoch": 0.4,
"learning_rate": 0.0002011263073209976,
"loss": 2.3221,
"step": 1000
},
{
"epoch": 0.44,
"learning_rate": 0.00022123893805309734,
"loss": 2.2408,
"step": 1100
},
{
"epoch": 0.48,
"learning_rate": 0.0002413515687851971,
"loss": 2.1777,
"step": 1200
},
{
"epoch": 0.52,
"learning_rate": 0.0002614641995172969,
"loss": 2.1231,
"step": 1300
},
{
"epoch": 0.56,
"learning_rate": 0.0002815768302493966,
"loss": 2.0787,
"step": 1400
},
{
"epoch": 0.6,
"learning_rate": 0.00030168946098149636,
"loss": 2.039,
"step": 1500
},
{
"epoch": 0.64,
"learning_rate": 0.0003218020917135962,
"loss": 2.0056,
"step": 1600
},
{
"epoch": 0.68,
"learning_rate": 0.0003419147224456959,
"loss": 1.9805,
"step": 1700
},
{
"epoch": 0.72,
"learning_rate": 0.00036202735317779567,
"loss": 1.9495,
"step": 1800
},
{
"epoch": 0.76,
"learning_rate": 0.0003821399839098954,
"loss": 1.927,
"step": 1900
},
{
"epoch": 0.8,
"learning_rate": 0.0004022526146419952,
"loss": 1.9033,
"step": 2000
},
{
"epoch": 0.84,
"learning_rate": 0.00042236524537409497,
"loss": 1.8861,
"step": 2100
},
{
"epoch": 0.88,
"learning_rate": 0.0004424778761061947,
"loss": 1.8659,
"step": 2200
},
{
"epoch": 0.92,
"learning_rate": 0.0004625905068382945,
"loss": 1.849,
"step": 2300
},
{
"epoch": 0.97,
"learning_rate": 0.0004827031375703942,
"loss": 1.8349,
"step": 2400
},
{
"epoch": 1.0,
"eval_accuracy": 0.3811347119515808,
"eval_loss": 1.7538774013519287,
"eval_runtime": 21.4451,
"eval_samples_per_second": 3710.317,
"eval_steps_per_second": 14.502,
"step": 2486
},
{
"epoch": 1.01,
"learning_rate": 0.000502815768302494,
"loss": 1.8161,
"step": 2500
},
{
"epoch": 1.05,
"learning_rate": 0.0005229283990345938,
"loss": 1.8011,
"step": 2600
},
{
"epoch": 1.09,
"learning_rate": 0.0005430410297666935,
"loss": 1.7851,
"step": 2700
},
{
"epoch": 1.13,
"learning_rate": 0.0005631536604987932,
"loss": 1.7762,
"step": 2800
},
{
"epoch": 1.17,
"learning_rate": 0.0005832662912308931,
"loss": 1.7612,
"step": 2900
},
{
"epoch": 1.21,
"learning_rate": 0.0006033789219629927,
"loss": 1.7521,
"step": 3000
},
{
"epoch": 1.25,
"learning_rate": 0.0006234915526950925,
"loss": 1.739,
"step": 3100
},
{
"epoch": 1.29,
"learning_rate": 0.0006436041834271924,
"loss": 1.7296,
"step": 3200
},
{
"epoch": 1.33,
"learning_rate": 0.0006637168141592921,
"loss": 1.7195,
"step": 3300
},
{
"epoch": 1.37,
"learning_rate": 0.0006838294448913918,
"loss": 1.7106,
"step": 3400
},
{
"epoch": 1.41,
"learning_rate": 0.0007039420756234916,
"loss": 1.7033,
"step": 3500
},
{
"epoch": 1.45,
"learning_rate": 0.0007240547063555913,
"loss": 1.6935,
"step": 3600
},
{
"epoch": 1.49,
"learning_rate": 0.0007441673370876911,
"loss": 1.6846,
"step": 3700
},
{
"epoch": 1.53,
"learning_rate": 0.0007642799678197908,
"loss": 1.6774,
"step": 3800
},
{
"epoch": 1.57,
"learning_rate": 0.0007843925985518905,
"loss": 1.6706,
"step": 3900
},
{
"epoch": 1.61,
"learning_rate": 0.0008045052292839904,
"loss": 1.6594,
"step": 4000
},
{
"epoch": 1.65,
"learning_rate": 0.0008246178600160902,
"loss": 1.6572,
"step": 4100
},
{
"epoch": 1.69,
"learning_rate": 0.0008447304907481899,
"loss": 1.649,
"step": 4200
},
{
"epoch": 1.73,
"learning_rate": 0.0008648431214802896,
"loss": 1.6442,
"step": 4300
},
{
"epoch": 1.77,
"learning_rate": 0.0008849557522123894,
"loss": 1.6377,
"step": 4400
},
{
"epoch": 1.81,
"learning_rate": 0.0009050683829444891,
"loss": 1.6332,
"step": 4500
},
{
"epoch": 1.85,
"learning_rate": 0.000925181013676589,
"loss": 1.6288,
"step": 4600
},
{
"epoch": 1.89,
"learning_rate": 0.0009452936444086887,
"loss": 1.6221,
"step": 4700
},
{
"epoch": 1.93,
"learning_rate": 0.0009654062751407884,
"loss": 1.6152,
"step": 4800
},
{
"epoch": 1.97,
"learning_rate": 0.000985518905872888,
"loss": 1.6122,
"step": 4900
},
{
"epoch": 2.0,
"eval_accuracy": 0.3949071464783068,
"eval_loss": 1.5559048652648926,
"eval_runtime": 17.9618,
"eval_samples_per_second": 4429.835,
"eval_steps_per_second": 17.314,
"step": 4973
},
{
"epoch": 2.01,
"learning_rate": 0.001005631536604988,
"loss": 1.6094,
"step": 5000
},
{
"epoch": 2.05,
"learning_rate": 0.0010257441673370879,
"loss": 1.6001,
"step": 5100
},
{
"epoch": 2.09,
"learning_rate": 0.0010458567980691875,
"loss": 1.5933,
"step": 5200
},
{
"epoch": 2.13,
"learning_rate": 0.0010659694288012872,
"loss": 1.5933,
"step": 5300
},
{
"epoch": 2.17,
"learning_rate": 0.001086082059533387,
"loss": 1.5913,
"step": 5400
},
{
"epoch": 2.21,
"learning_rate": 0.0011061946902654867,
"loss": 1.5864,
"step": 5500
},
{
"epoch": 2.25,
"learning_rate": 0.0011263073209975864,
"loss": 1.5847,
"step": 5600
},
{
"epoch": 2.29,
"learning_rate": 0.0011464199517296862,
"loss": 1.5822,
"step": 5700
},
{
"epoch": 2.33,
"learning_rate": 0.0011665325824617861,
"loss": 1.5794,
"step": 5800
},
{
"epoch": 2.37,
"learning_rate": 0.0011864440868865648,
"loss": 1.5771,
"step": 5900
},
{
"epoch": 2.41,
"learning_rate": 0.0012065567176186645,
"loss": 1.5719,
"step": 6000
},
{
"epoch": 2.45,
"learning_rate": 0.0012266693483507644,
"loss": 1.5731,
"step": 6100
},
{
"epoch": 2.49,
"learning_rate": 0.001246781979082864,
"loss": 1.5661,
"step": 6200
},
{
"epoch": 2.53,
"learning_rate": 0.001266894609814964,
"loss": 1.5704,
"step": 6300
},
{
"epoch": 2.57,
"learning_rate": 0.0012870072405470638,
"loss": 1.5659,
"step": 6400
},
{
"epoch": 2.61,
"learning_rate": 0.0013071198712791632,
"loss": 1.5616,
"step": 6500
},
{
"epoch": 2.65,
"learning_rate": 0.001327232502011263,
"loss": 1.5614,
"step": 6600
},
{
"epoch": 2.69,
"learning_rate": 0.0013473451327433628,
"loss": 1.5582,
"step": 6700
},
{
"epoch": 2.73,
"learning_rate": 0.0013674577634754626,
"loss": 1.5545,
"step": 6800
},
{
"epoch": 2.77,
"learning_rate": 0.0013875703942075623,
"loss": 1.5547,
"step": 6900
},
{
"epoch": 2.81,
"learning_rate": 0.0014076830249396622,
"loss": 1.5548,
"step": 7000
},
{
"epoch": 2.86,
"learning_rate": 0.001427795655671762,
"loss": 1.55,
"step": 7100
},
{
"epoch": 2.9,
"learning_rate": 0.0014479082864038617,
"loss": 1.5533,
"step": 7200
},
{
"epoch": 2.94,
"learning_rate": 0.0014680209171359616,
"loss": 1.5483,
"step": 7300
},
{
"epoch": 2.98,
"learning_rate": 0.001488133547868061,
"loss": 1.5457,
"step": 7400
},
{
"epoch": 3.0,
"eval_accuracy": 0.39967479317894095,
"eval_loss": 1.4914867877960205,
"eval_runtime": 18.1883,
"eval_samples_per_second": 4374.679,
"eval_steps_per_second": 17.099,
"step": 7460
},
{
"epoch": 3.02,
"learning_rate": 0.001508246178600161,
"loss": 1.5441,
"step": 7500
},
{
"epoch": 3.06,
"learning_rate": 0.0015283588093322606,
"loss": 1.5386,
"step": 7600
},
{
"epoch": 3.1,
"learning_rate": 0.0015484714400643604,
"loss": 1.5364,
"step": 7700
},
{
"epoch": 3.14,
"learning_rate": 0.0015685840707964603,
"loss": 1.536,
"step": 7800
},
{
"epoch": 3.18,
"learning_rate": 0.00158869670152856,
"loss": 1.5351,
"step": 7900
},
{
"epoch": 3.22,
"learning_rate": 0.0016088093322606598,
"loss": 1.5372,
"step": 8000
},
{
"epoch": 3.26,
"learning_rate": 0.0016289219629927595,
"loss": 1.5407,
"step": 8100
},
{
"epoch": 3.3,
"learning_rate": 0.0016490345937248594,
"loss": 1.5338,
"step": 8200
},
{
"epoch": 3.34,
"learning_rate": 0.0016691472244569588,
"loss": 1.534,
"step": 8300
},
{
"epoch": 3.38,
"learning_rate": 0.0016892598551890587,
"loss": 1.5331,
"step": 8400
},
{
"epoch": 3.42,
"learning_rate": 0.0017093724859211584,
"loss": 1.534,
"step": 8500
},
{
"epoch": 3.46,
"learning_rate": 0.0017294851166532582,
"loss": 1.5347,
"step": 8600
},
{
"epoch": 3.5,
"learning_rate": 0.0017495977473853581,
"loss": 1.5271,
"step": 8700
},
{
"epoch": 3.54,
"learning_rate": 0.0017697103781174578,
"loss": 1.5301,
"step": 8800
},
{
"epoch": 3.58,
"learning_rate": 0.0017898230088495577,
"loss": 1.5301,
"step": 8900
},
{
"epoch": 3.62,
"learning_rate": 0.0018099356395816573,
"loss": 1.5266,
"step": 9000
},
{
"epoch": 3.66,
"learning_rate": 0.0018300482703137572,
"loss": 1.5283,
"step": 9100
},
{
"epoch": 3.7,
"learning_rate": 0.0018501609010458566,
"loss": 1.5276,
"step": 9200
},
{
"epoch": 3.74,
"learning_rate": 0.0018702735317779565,
"loss": 1.5259,
"step": 9300
},
{
"epoch": 3.78,
"learning_rate": 0.0018903861625100564,
"loss": 1.5223,
"step": 9400
},
{
"epoch": 3.82,
"learning_rate": 0.001910498793242156,
"loss": 1.5289,
"step": 9500
},
{
"epoch": 3.86,
"learning_rate": 0.001930611423974256,
"loss": 1.5279,
"step": 9600
},
{
"epoch": 3.9,
"learning_rate": 0.0019507240547063556,
"loss": 1.5226,
"step": 9700
},
{
"epoch": 3.94,
"learning_rate": 0.0019708366854384552,
"loss": 1.5237,
"step": 9800
},
{
"epoch": 3.98,
"learning_rate": 0.001990949316170555,
"loss": 1.5245,
"step": 9900
},
{
"epoch": 4.0,
"eval_accuracy": 0.40115904050888884,
"eval_loss": 1.47209894657135,
"eval_runtime": 18.1424,
"eval_samples_per_second": 4385.747,
"eval_steps_per_second": 17.142,
"step": 9947
},
{
"epoch": 4.02,
"learning_rate": 0.002011061946902655,
"loss": 1.5211,
"step": 10000
},
{
"epoch": 4.06,
"learning_rate": 0.0020311745776347544,
"loss": 1.5108,
"step": 10100
},
{
"epoch": 4.1,
"learning_rate": 0.0020512872083668543,
"loss": 1.5157,
"step": 10200
},
{
"epoch": 4.14,
"learning_rate": 0.002071399839098954,
"loss": 1.5155,
"step": 10300
},
{
"epoch": 4.18,
"learning_rate": 0.002091512469831054,
"loss": 1.5171,
"step": 10400
},
{
"epoch": 4.22,
"learning_rate": 0.0021116251005631535,
"loss": 1.5195,
"step": 10500
},
{
"epoch": 4.26,
"learning_rate": 0.0021317377312952534,
"loss": 1.5162,
"step": 10600
},
{
"epoch": 4.3,
"learning_rate": 0.0021518503620273533,
"loss": 1.5174,
"step": 10700
},
{
"epoch": 4.34,
"learning_rate": 0.002171962992759453,
"loss": 1.5163,
"step": 10800
},
{
"epoch": 4.38,
"learning_rate": 0.002192075623491553,
"loss": 1.5158,
"step": 10900
},
{
"epoch": 4.42,
"learning_rate": 0.0022121882542236525,
"loss": 1.516,
"step": 11000
},
{
"epoch": 4.46,
"learning_rate": 0.0022323008849557523,
"loss": 1.5143,
"step": 11100
},
{
"epoch": 4.5,
"learning_rate": 0.0022524135156878518,
"loss": 1.5163,
"step": 11200
},
{
"epoch": 4.54,
"learning_rate": 0.0022725261464199517,
"loss": 1.5128,
"step": 11300
},
{
"epoch": 4.58,
"learning_rate": 0.0022926387771520515,
"loss": 1.5193,
"step": 11400
},
{
"epoch": 4.62,
"learning_rate": 0.0023127514078841514,
"loss": 1.5113,
"step": 11500
},
{
"epoch": 4.66,
"learning_rate": 0.0023328640386162513,
"loss": 1.5149,
"step": 11600
},
{
"epoch": 4.7,
"learning_rate": 0.0023529766693483507,
"loss": 1.5127,
"step": 11700
},
{
"epoch": 4.75,
"learning_rate": 0.0023730893000804506,
"loss": 1.5126,
"step": 11800
},
{
"epoch": 4.79,
"learning_rate": 0.00239320193081255,
"loss": 1.5118,
"step": 11900
},
{
"epoch": 4.83,
"learning_rate": 0.00241331456154465,
"loss": 1.5127,
"step": 12000
},
{
"epoch": 4.87,
"learning_rate": 0.00243342719227675,
"loss": 1.5136,
"step": 12100
},
{
"epoch": 4.91,
"learning_rate": 0.0024535398230088497,
"loss": 1.513,
"step": 12200
},
{
"epoch": 4.95,
"learning_rate": 0.0024736524537409496,
"loss": 1.5142,
"step": 12300
},
{
"epoch": 4.99,
"learning_rate": 0.002493765084473049,
"loss": 1.5146,
"step": 12400
},
{
"epoch": 5.0,
"eval_accuracy": 0.4013220687487078,
"eval_loss": 1.4687080383300781,
"eval_runtime": 18.9065,
"eval_samples_per_second": 4208.5,
"eval_steps_per_second": 16.449,
"step": 12433
},
{
"epoch": 5.03,
"learning_rate": 0.0024999976533380463,
"loss": 1.5061,
"step": 12500
},
{
"epoch": 5.07,
"learning_rate": 0.0024999859225147613,
"loss": 1.502,
"step": 12600
},
{
"epoch": 5.11,
"learning_rate": 0.0024999643339526922,
"loss": 1.5045,
"step": 12700
},
{
"epoch": 5.15,
"learning_rate": 0.002499932887822093,
"loss": 1.5039,
"step": 12800
},
{
"epoch": 5.19,
"learning_rate": 0.0024998915843709568,
"loss": 1.5011,
"step": 12900
},
{
"epoch": 5.23,
"learning_rate": 0.0024998404239250133,
"loss": 1.5024,
"step": 13000
},
{
"epoch": 5.27,
"learning_rate": 0.002499779406887729,
"loss": 1.5024,
"step": 13100
},
{
"epoch": 5.31,
"learning_rate": 0.0024997085337403013,
"loss": 1.5007,
"step": 13200
},
{
"epoch": 5.35,
"learning_rate": 0.0024996278050416552,
"loss": 1.5031,
"step": 13300
},
{
"epoch": 5.39,
"learning_rate": 0.0024995372214284403,
"loss": 1.4963,
"step": 13400
},
{
"epoch": 5.43,
"learning_rate": 0.002499436783615024,
"loss": 1.4998,
"step": 13500
},
{
"epoch": 5.47,
"learning_rate": 0.0024993264923934867,
"loss": 1.5001,
"step": 13600
},
{
"epoch": 5.51,
"learning_rate": 0.0024992063486336162,
"loss": 1.4964,
"step": 13700
},
{
"epoch": 5.55,
"learning_rate": 0.0024990763532829,
"loss": 1.4961,
"step": 13800
},
{
"epoch": 5.59,
"learning_rate": 0.0024989365073665175,
"loss": 1.4939,
"step": 13900
},
{
"epoch": 5.63,
"learning_rate": 0.002498786811987333,
"loss": 1.4958,
"step": 14000
},
{
"epoch": 5.67,
"learning_rate": 0.002498627268325886,
"loss": 1.4941,
"step": 14100
},
{
"epoch": 5.71,
"learning_rate": 0.0024984578776403826,
"loss": 1.4901,
"step": 14200
},
{
"epoch": 5.75,
"learning_rate": 0.0024982786412666848,
"loss": 1.4942,
"step": 14300
},
{
"epoch": 5.79,
"learning_rate": 0.002498089560618301,
"loss": 1.4931,
"step": 14400
},
{
"epoch": 5.83,
"learning_rate": 0.002497890637186374,
"loss": 1.4936,
"step": 14500
},
{
"epoch": 5.87,
"learning_rate": 0.00249768187253967,
"loss": 1.4899,
"step": 14600
},
{
"epoch": 5.91,
"learning_rate": 0.0024974632683245654,
"loss": 1.4883,
"step": 14700
},
{
"epoch": 5.95,
"learning_rate": 0.002497234826265034,
"loss": 1.4887,
"step": 14800
},
{
"epoch": 5.99,
"learning_rate": 0.0024969965481626334,
"loss": 1.4858,
"step": 14900
},
{
"epoch": 6.0,
"eval_accuracy": 0.40323114536808624,
"eval_loss": 1.439610242843628,
"eval_runtime": 20.1557,
"eval_samples_per_second": 3947.668,
"eval_steps_per_second": 15.43,
"step": 14920
},
{
"epoch": 6.03,
"learning_rate": 0.002496748435896492,
"loss": 1.4802,
"step": 15000
},
{
"epoch": 6.07,
"learning_rate": 0.0024964904914232923,
"loss": 1.4769,
"step": 15100
},
{
"epoch": 6.11,
"learning_rate": 0.002496222716777257,
"loss": 1.479,
"step": 15200
},
{
"epoch": 6.15,
"learning_rate": 0.0024959479387389807,
"loss": 1.4803,
"step": 15300
},
{
"epoch": 6.19,
"learning_rate": 0.002495660608407645,
"loss": 1.4815,
"step": 15400
},
{
"epoch": 6.23,
"learning_rate": 0.0024953634544481627,
"loss": 1.4808,
"step": 15500
},
{
"epoch": 6.27,
"learning_rate": 0.0024950564792039736,
"loss": 1.4785,
"step": 15600
},
{
"epoch": 6.31,
"learning_rate": 0.00249473968509597,
"loss": 1.4752,
"step": 15700
},
{
"epoch": 6.35,
"learning_rate": 0.002494413074622479,
"loss": 1.4801,
"step": 15800
},
{
"epoch": 6.39,
"learning_rate": 0.002494076650359243,
"loss": 1.4795,
"step": 15900
},
{
"epoch": 6.43,
"learning_rate": 0.0024937304149593967,
"loss": 1.4828,
"step": 16000
},
{
"epoch": 6.47,
"learning_rate": 0.00249337437115345,
"loss": 1.4783,
"step": 16100
},
{
"epoch": 6.51,
"learning_rate": 0.0024930085217492637,
"loss": 1.4761,
"step": 16200
},
{
"epoch": 6.55,
"learning_rate": 0.002492632869632029,
"loss": 1.4757,
"step": 16300
},
{
"epoch": 6.59,
"learning_rate": 0.0024922474177642433,
"loss": 1.4782,
"step": 16400
},
{
"epoch": 6.64,
"learning_rate": 0.0024918521691856877,
"loss": 1.4783,
"step": 16500
},
{
"epoch": 6.68,
"learning_rate": 0.0024914471270134036,
"loss": 1.4739,
"step": 16600
},
{
"epoch": 6.72,
"learning_rate": 0.0024910322944416666,
"loss": 1.4727,
"step": 16700
},
{
"epoch": 6.76,
"learning_rate": 0.0024906076747419625,
"loss": 1.4728,
"step": 16800
},
{
"epoch": 6.8,
"learning_rate": 0.002490173271262961,
"loss": 1.4753,
"step": 16900
},
{
"epoch": 6.84,
"learning_rate": 0.0024897290874304895,
"loss": 1.4742,
"step": 17000
},
{
"epoch": 6.88,
"learning_rate": 0.0024892751267475066,
"loss": 1.4719,
"step": 17100
},
{
"epoch": 6.92,
"learning_rate": 0.002488811392794072,
"loss": 1.4748,
"step": 17200
},
{
"epoch": 6.96,
"learning_rate": 0.002488337889227323,
"loss": 1.4709,
"step": 17300
},
{
"epoch": 7.0,
"learning_rate": 0.00248785461978144,
"loss": 1.4754,
"step": 17400
},
{
"epoch": 7.0,
"eval_accuracy": 0.40480212867795956,
"eval_loss": 1.4229631423950195,
"eval_runtime": 17.9608,
"eval_samples_per_second": 4430.085,
"eval_steps_per_second": 17.315,
"step": 17407
},
{
"epoch": 7.04,
"learning_rate": 0.0024873615882676217,
"loss": 1.4627,
"step": 17500
},
{
"epoch": 7.08,
"learning_rate": 0.002486858798574052,
"loss": 1.4651,
"step": 17600
},
{
"epoch": 7.12,
"learning_rate": 0.002486346254665872,
"loss": 1.4677,
"step": 17700
},
{
"epoch": 7.16,
"learning_rate": 0.002485823960585146,
"loss": 1.4716,
"step": 17800
},
{
"epoch": 7.2,
"learning_rate": 0.0024852919204508307,
"loss": 1.4668,
"step": 17900
},
{
"epoch": 7.24,
"learning_rate": 0.0024847501384587444,
"loss": 1.4655,
"step": 18000
},
{
"epoch": 7.28,
"learning_rate": 0.0024841986188815315,
"loss": 1.4662,
"step": 18100
},
{
"epoch": 7.32,
"learning_rate": 0.0024836373660686284,
"loss": 1.4645,
"step": 18200
},
{
"epoch": 7.36,
"learning_rate": 0.0024830663844462334,
"loss": 1.4642,
"step": 18300
},
{
"epoch": 7.4,
"learning_rate": 0.0024824856785172667,
"loss": 1.4639,
"step": 18400
},
{
"epoch": 7.44,
"learning_rate": 0.0024818952528613374,
"loss": 1.4637,
"step": 18500
},
{
"epoch": 7.48,
"learning_rate": 0.0024812951121347083,
"loss": 1.4662,
"step": 18600
},
{
"epoch": 7.52,
"learning_rate": 0.0024806852610702564,
"loss": 1.4635,
"step": 18700
},
{
"epoch": 7.56,
"learning_rate": 0.0024800657044774382,
"loss": 1.4648,
"step": 18800
},
{
"epoch": 7.6,
"learning_rate": 0.0024794364472422504,
"loss": 1.4616,
"step": 18900
},
{
"epoch": 7.64,
"learning_rate": 0.002478797494327191,
"loss": 1.4658,
"step": 19000
},
{
"epoch": 7.68,
"learning_rate": 0.0024781488507712225,
"loss": 1.4603,
"step": 19100
},
{
"epoch": 7.72,
"learning_rate": 0.0024774905216897293,
"loss": 1.4614,
"step": 19200
},
{
"epoch": 7.76,
"learning_rate": 0.002476829240269048,
"loss": 1.4645,
"step": 19300
},
{
"epoch": 7.8,
"learning_rate": 0.002476158476264398,
"loss": 1.4623,
"step": 19400
},
{
"epoch": 7.84,
"learning_rate": 0.0024754713154038154,
"loss": 1.4618,
"step": 19500
},
{
"epoch": 7.88,
"learning_rate": 0.0024747744901335014,
"loss": 1.4615,
"step": 19600
},
{
"epoch": 7.92,
"learning_rate": 0.0024740680059488146,
"loss": 1.4611,
"step": 19700
},
{
"epoch": 7.96,
"learning_rate": 0.002473351868421287,
"loss": 1.4627,
"step": 19800
},
{
"epoch": 8.0,
"eval_accuracy": 0.40544716114907825,
"eval_loss": 1.414832353591919,
"eval_runtime": 18.0495,
"eval_samples_per_second": 4408.317,
"eval_steps_per_second": 17.23,
"step": 19894
},
{
"epoch": 8.0,
"learning_rate": 0.00247262608319858,
"loss": 1.463,
"step": 19900
},
{
"epoch": 8.04,
"learning_rate": 0.0024718906560044383,
"loss": 1.454,
"step": 20000
},
{
"epoch": 8.08,
"learning_rate": 0.0024711455926386466,
"loss": 1.4563,
"step": 20100
},
{
"epoch": 8.12,
"learning_rate": 0.0024703908989769843,
"loss": 1.4566,
"step": 20200
},
{
"epoch": 8.16,
"learning_rate": 0.0024696265809711748,
"loss": 1.4524,
"step": 20300
},
{
"epoch": 8.2,
"learning_rate": 0.0024688526446488453,
"loss": 1.4551,
"step": 20400
},
{
"epoch": 8.24,
"learning_rate": 0.0024680690961134738,
"loss": 1.4555,
"step": 20500
},
{
"epoch": 8.28,
"learning_rate": 0.0024672759415443435,
"loss": 1.4587,
"step": 20600
},
{
"epoch": 8.32,
"learning_rate": 0.002466473187196493,
"loss": 1.4553,
"step": 20700
},
{
"epoch": 8.36,
"learning_rate": 0.0024656608394006676,
"loss": 1.4581,
"step": 20800
},
{
"epoch": 8.4,
"learning_rate": 0.00246483890456327,
"loss": 1.4556,
"step": 20900
},
{
"epoch": 8.44,
"learning_rate": 0.002464007389166307,
"loss": 1.4559,
"step": 21000
},
{
"epoch": 8.48,
"learning_rate": 0.0024631662997673435,
"loss": 1.455,
"step": 21100
},
{
"epoch": 8.53,
"learning_rate": 0.0024623156429994446,
"loss": 1.4568,
"step": 21200
},
{
"epoch": 8.57,
"learning_rate": 0.002461455425571128,
"loss": 1.4531,
"step": 21300
},
{
"epoch": 8.61,
"learning_rate": 0.0024605856542663095,
"loss": 1.4568,
"step": 21400
},
{
"epoch": 8.65,
"learning_rate": 0.002459715176362497,
"loss": 1.455,
"step": 21500
},
{
"epoch": 8.69,
"learning_rate": 0.0024588264133239973,
"loss": 1.4526,
"step": 21600
},
{
"epoch": 8.73,
"learning_rate": 0.002457928117142122,
"loss": 1.4506,
"step": 21700
},
{
"epoch": 8.77,
"learning_rate": 0.0024570202949010856,
"loss": 1.4506,
"step": 21800
},
{
"epoch": 8.81,
"learning_rate": 0.0024561029537602283,
"loss": 1.4561,
"step": 21900
},
{
"epoch": 8.85,
"learning_rate": 0.0024551761009539595,
"loss": 1.4556,
"step": 22000
},
{
"epoch": 8.89,
"learning_rate": 0.0024542397437916992,
"loss": 1.4535,
"step": 22100
},
{
"epoch": 8.93,
"learning_rate": 0.0024532938896578227,
"loss": 1.4519,
"step": 22200
},
{
"epoch": 8.97,
"learning_rate": 0.0024523385460115997,
"loss": 1.454,
"step": 22300
},
{
"epoch": 9.0,
"eval_accuracy": 0.4060033334938244,
"eval_loss": 1.4071589708328247,
"eval_runtime": 18.1007,
"eval_samples_per_second": 4395.863,
"eval_steps_per_second": 17.182,
"step": 22380
},
{
"epoch": 9.01,
"learning_rate": 0.002451373720387138,
"loss": 1.4531,
"step": 22400
},
{
"epoch": 9.05,
"learning_rate": 0.0024503994203933224,
"loss": 1.4438,
"step": 22500
},
{
"epoch": 9.09,
"learning_rate": 0.0024494156537137554,
"loss": 1.4447,
"step": 22600
},
{
"epoch": 9.13,
"learning_rate": 0.002448422428106696,
"loss": 1.4519,
"step": 22700
},
{
"epoch": 9.17,
"learning_rate": 0.002447419751404999,
"loss": 1.446,
"step": 22800
},
{
"epoch": 9.21,
"learning_rate": 0.0024464076315160546,
"loss": 1.4473,
"step": 22900
},
{
"epoch": 9.25,
"learning_rate": 0.002445386076421723,
"loss": 1.4464,
"step": 23000
},
{
"epoch": 9.29,
"learning_rate": 0.002444355094178273,
"loss": 1.4468,
"step": 23100
},
{
"epoch": 9.33,
"learning_rate": 0.002443314692916319,
"loss": 1.4458,
"step": 23200
},
{
"epoch": 9.37,
"learning_rate": 0.002442264880840757,
"loss": 1.4466,
"step": 23300
},
{
"epoch": 9.41,
"learning_rate": 0.002441205666230699,
"loss": 1.4503,
"step": 23400
},
{
"epoch": 9.45,
"learning_rate": 0.002440137057439408,
"loss": 1.4497,
"step": 23500
},
{
"epoch": 9.49,
"learning_rate": 0.002439069889271312,
"loss": 1.4424,
"step": 23600
},
{
"epoch": 9.53,
"learning_rate": 0.0024379826112038167,
"loss": 1.4455,
"step": 23700
},
{
"epoch": 9.57,
"learning_rate": 0.002436885964373002,
"loss": 1.4484,
"step": 23800
},
{
"epoch": 9.61,
"learning_rate": 0.0024357799574273318,
"loss": 1.4497,
"step": 23900
},
{
"epoch": 9.65,
"learning_rate": 0.002434664599089086,
"loss": 1.4468,
"step": 24000
},
{
"epoch": 9.69,
"learning_rate": 0.0024335398981542924,
"loss": 1.4449,
"step": 24100
},
{
"epoch": 9.73,
"learning_rate": 0.0024324058634926583,
"loss": 1.4449,
"step": 24200
},
{
"epoch": 9.77,
"learning_rate": 0.0024312625040474973,
"loss": 1.4472,
"step": 24300
},
{
"epoch": 9.81,
"learning_rate": 0.002430109828835662,
"loss": 1.449,
"step": 24400
},
{
"epoch": 9.85,
"learning_rate": 0.0024289478469474725,
"loss": 1.4505,
"step": 24500
},
{
"epoch": 9.89,
"learning_rate": 0.002427776567546643,
"loss": 1.4471,
"step": 24600
},
{
"epoch": 9.93,
"learning_rate": 0.0024265959998702098,
"loss": 1.4465,
"step": 24700
},
{
"epoch": 9.97,
"learning_rate": 0.0024254061532284605,
"loss": 1.4466,
"step": 24800
},
{
"epoch": 10.0,
"eval_accuracy": 0.406620929073334,
"eval_loss": 1.3988122940063477,
"eval_runtime": 18.1787,
"eval_samples_per_second": 4376.995,
"eval_steps_per_second": 17.108,
"step": 24867
},
{
"epoch": 10.01,
"learning_rate": 0.002424207037004859,
"loss": 1.4474,
"step": 24900
},
{
"epoch": 10.05,
"learning_rate": 0.002422998660655971,
"loss": 1.4368,
"step": 25000
},
{
"epoch": 10.09,
"learning_rate": 0.0024217810337113905,
"loss": 1.4385,
"step": 25100
},
{
"epoch": 10.13,
"learning_rate": 0.0024205541657736644,
"loss": 1.4393,
"step": 25200
},
{
"epoch": 10.17,
"learning_rate": 0.002419318066518217,
"loss": 1.4421,
"step": 25300
},
{
"epoch": 10.21,
"learning_rate": 0.002418072745693272,
"loss": 1.4386,
"step": 25400
},
{
"epoch": 10.25,
"learning_rate": 0.002416818213119779,
"loss": 1.4415,
"step": 25500
},
{
"epoch": 10.29,
"learning_rate": 0.002415554478691332,
"loss": 1.4401,
"step": 25600
},
{
"epoch": 10.33,
"learning_rate": 0.002414281552374095,
"loss": 1.4384,
"step": 25700
},
{
"epoch": 10.37,
"learning_rate": 0.002412999444206721,
"loss": 1.441,
"step": 25800
},
{
"epoch": 10.42,
"learning_rate": 0.0024117081643002737,
"loss": 1.4446,
"step": 25900
},
{
"epoch": 10.46,
"learning_rate": 0.002410407722838148,
"loss": 1.4396,
"step": 26000
},
{
"epoch": 10.5,
"learning_rate": 0.002409098130075989,
"loss": 1.4394,
"step": 26100
},
{
"epoch": 10.54,
"learning_rate": 0.0024077793963416115,
"loss": 1.4417,
"step": 26200
},
{
"epoch": 10.58,
"learning_rate": 0.002406451532034919,
"loss": 1.4395,
"step": 26300
},
{
"epoch": 10.62,
"learning_rate": 0.0024051145476278214,
"loss": 1.4405,
"step": 26400
},
{
"epoch": 10.66,
"learning_rate": 0.0024037684536641515,
"loss": 1.4384,
"step": 26500
},
{
"epoch": 10.7,
"learning_rate": 0.0024024132607595823,
"loss": 1.4413,
"step": 26600
},
{
"epoch": 10.74,
"learning_rate": 0.0024010489796015455,
"loss": 1.4423,
"step": 26700
},
{
"epoch": 10.78,
"learning_rate": 0.0023996756209491432,
"loss": 1.4449,
"step": 26800
},
{
"epoch": 10.82,
"learning_rate": 0.002398293195633067,
"loss": 1.4401,
"step": 26900
},
{
"epoch": 10.86,
"learning_rate": 0.002396901714555509,
"loss": 1.4412,
"step": 27000
},
{
"epoch": 10.9,
"learning_rate": 0.0023955011886900783,
"loss": 1.4428,
"step": 27100
},
{
"epoch": 10.94,
"learning_rate": 0.0023940916290817143,
"loss": 1.4398,
"step": 27200
},
{
"epoch": 10.98,
"learning_rate": 0.0023926730468465983,
"loss": 1.4389,
"step": 27300
},
{
"epoch": 11.0,
"eval_accuracy": 0.4070477054970078,
"eval_loss": 1.3937705755233765,
"eval_runtime": 18.2285,
"eval_samples_per_second": 4365.035,
"eval_steps_per_second": 17.061,
"step": 27354
},
{
"epoch": 11.02,
"learning_rate": 0.002391245453172067,
"loss": 1.4391,
"step": 27400
},
{
"epoch": 11.06,
"learning_rate": 0.0023898232697688333,
"loss": 1.4313,
"step": 27500
},
{
"epoch": 11.1,
"learning_rate": 0.0023883922762806178,
"loss": 1.4304,
"step": 27600
},
{
"epoch": 11.14,
"learning_rate": 0.002386937895558928,
"loss": 1.4324,
"step": 27700
},
{
"epoch": 11.18,
"learning_rate": 0.0023854745486268304,
"loss": 1.4352,
"step": 27800
},
{
"epoch": 11.22,
"learning_rate": 0.002384002247024689,
"loss": 1.4361,
"step": 27900
},
{
"epoch": 11.26,
"learning_rate": 0.0023825210023634864,
"loss": 1.4335,
"step": 28000
},
{
"epoch": 11.3,
"learning_rate": 0.0023810308263247314,
"loss": 1.4349,
"step": 28100
},
{
"epoch": 11.34,
"learning_rate": 0.00237953173066037,
"loss": 1.4324,
"step": 28200
},
{
"epoch": 11.38,
"learning_rate": 0.002378023727192691,
"loss": 1.4355,
"step": 28300
},
{
"epoch": 11.42,
"learning_rate": 0.00237650682781423,
"loss": 1.4348,
"step": 28400
},
{
"epoch": 11.46,
"learning_rate": 0.002374981044487681,
"loss": 1.4363,
"step": 28500
},
{
"epoch": 11.5,
"learning_rate": 0.0023734463892457975,
"loss": 1.4323,
"step": 28600
},
{
"epoch": 11.54,
"learning_rate": 0.0023719028741913013,
"loss": 1.436,
"step": 28700
},
{
"epoch": 11.58,
"learning_rate": 0.0023703505114967835,
"loss": 1.437,
"step": 28800
},
{
"epoch": 11.62,
"learning_rate": 0.0023687893134046105,
"loss": 1.4345,
"step": 28900
},
{
"epoch": 11.66,
"learning_rate": 0.002367219292226828,
"loss": 1.4345,
"step": 29000
},
{
"epoch": 11.7,
"learning_rate": 0.002365640460345062,
"loss": 1.4343,
"step": 29100
},
{
"epoch": 11.74,
"learning_rate": 0.0023640528302104223,
"loss": 1.4328,
"step": 29200
},
{
"epoch": 11.78,
"learning_rate": 0.002362456414343405,
"loss": 1.4336,
"step": 29300
},
{
"epoch": 11.82,
"learning_rate": 0.0023608512253337913,
"loss": 1.4337,
"step": 29400
},
{
"epoch": 11.86,
"learning_rate": 0.002359237275840552,
"loss": 1.4344,
"step": 29500
},
{
"epoch": 11.9,
"learning_rate": 0.002357614578591744,
"loss": 1.4398,
"step": 29600
},
{
"epoch": 11.94,
"learning_rate": 0.0023559831463844123,
"loss": 1.4358,
"step": 29700
},
{
"epoch": 11.98,
"learning_rate": 0.002354342992084487,
"loss": 1.4345,
"step": 29800
},
{
"epoch": 12.0,
"eval_accuracy": 0.4074224403327263,
"eval_loss": 1.3885866403579712,
"eval_runtime": 18.0852,
"eval_samples_per_second": 4399.614,
"eval_steps_per_second": 17.196,
"step": 29841
},
{
"epoch": 12.02,
"learning_rate": 0.002352694128626685,
"loss": 1.4304,
"step": 29900
},
{
"epoch": 12.06,
"learning_rate": 0.002351036569014404,
"loss": 1.4256,
"step": 30000
},
{
"epoch": 12.1,
"learning_rate": 0.0023493703263196236,
"loss": 1.4274,
"step": 30100
},
{
"epoch": 12.14,
"learning_rate": 0.0023476954136827997,
"loss": 1.4275,
"step": 30200
},
{
"epoch": 12.18,
"learning_rate": 0.002346011844312762,
"loss": 1.4272,
"step": 30300
},
{
"epoch": 12.22,
"learning_rate": 0.0023443196314866096,
"loss": 1.4314,
"step": 30400
},
{
"epoch": 12.27,
"learning_rate": 0.002342618788549607,
"loss": 1.4298,
"step": 30500
},
{
"epoch": 12.31,
"learning_rate": 0.0023409264661198655,
"loss": 1.4311,
"step": 30600
},
{
"epoch": 12.35,
"learning_rate": 0.0023392084892342917,
"loss": 1.4278,
"step": 30700
},
{
"epoch": 12.39,
"learning_rate": 0.0023374819225457665,
"loss": 1.432,
"step": 30800
},
{
"epoch": 12.43,
"learning_rate": 0.0023357467796704763,
"loss": 1.4299,
"step": 30900
},
{
"epoch": 12.47,
"learning_rate": 0.002334003074292244,
"loss": 1.43,
"step": 31000
},
{
"epoch": 12.51,
"learning_rate": 0.002332250820162418,
"loss": 1.431,
"step": 31100
},
{
"epoch": 12.55,
"learning_rate": 0.0023304900310997653,
"loss": 1.4272,
"step": 31200
},
{
"epoch": 12.59,
"learning_rate": 0.0023287207209903606,
"loss": 1.4301,
"step": 31300
},
{
"epoch": 12.63,
"learning_rate": 0.0023269429037874783,
"loss": 1.4328,
"step": 31400
},
{
"epoch": 12.67,
"learning_rate": 0.002325156593511483,
"loss": 1.4304,
"step": 31500
},
{
"epoch": 12.71,
"learning_rate": 0.0023233618042497167,
"loss": 1.4286,
"step": 31600
},
{
"epoch": 12.75,
"learning_rate": 0.0023215585501563905,
"loss": 1.4319,
"step": 31700
},
{
"epoch": 12.79,
"learning_rate": 0.002319746845452471,
"loss": 1.4311,
"step": 31800
},
{
"epoch": 12.83,
"learning_rate": 0.0023179267044255675,
"loss": 1.4293,
"step": 31900
},
{
"epoch": 12.87,
"learning_rate": 0.0023160981414298222,
"loss": 1.4296,
"step": 32000
},
{
"epoch": 12.91,
"learning_rate": 0.0023142611708857944,
"loss": 1.4288,
"step": 32100
},
{
"epoch": 12.95,
"learning_rate": 0.002312415807280348,
"loss": 1.4231,
"step": 32200
},
{
"epoch": 12.99,
"learning_rate": 0.0023105620651665366,
"loss": 1.4286,
"step": 32300
},
{
"epoch": 13.0,
"eval_accuracy": 0.4075583857053441,
"eval_loss": 1.3839157819747925,
"eval_runtime": 18.2394,
"eval_samples_per_second": 4362.423,
"eval_steps_per_second": 17.051,
"step": 32327
},
{
"epoch": 13.03,
"learning_rate": 0.002308699959163489,
"loss": 1.4222,
"step": 32400
},
{
"epoch": 13.07,
"learning_rate": 0.002306829503956295,
"loss": 1.4208,
"step": 32500
},
{
"epoch": 13.11,
"learning_rate": 0.0023049507142958872,
"loss": 1.4234,
"step": 32600
},
{
"epoch": 13.15,
"learning_rate": 0.0023030636049989265,
"loss": 1.4199,
"step": 32700
},
{
"epoch": 13.19,
"learning_rate": 0.002301168190947686,
"loss": 1.4248,
"step": 32800
},
{
"epoch": 13.23,
"learning_rate": 0.00229926448708993,
"loss": 1.4215,
"step": 32900
},
{
"epoch": 13.27,
"learning_rate": 0.002297352508438801,
"loss": 1.4243,
"step": 33000
},
{
"epoch": 13.31,
"learning_rate": 0.0022954322700726964,
"loss": 1.421,
"step": 33100
},
{
"epoch": 13.35,
"learning_rate": 0.002293503787135154,
"loss": 1.4235,
"step": 33200
},
{
"epoch": 13.39,
"learning_rate": 0.0022915670748347304,
"loss": 1.4255,
"step": 33300
},
{
"epoch": 13.43,
"learning_rate": 0.00228962214844488,
"loss": 1.4246,
"step": 33400
},
{
"epoch": 13.47,
"learning_rate": 0.0022876690233038367,
"loss": 1.4283,
"step": 33500
},
{
"epoch": 13.51,
"learning_rate": 0.0022857077148144924,
"loss": 1.4174,
"step": 33600
},
{
"epoch": 13.55,
"learning_rate": 0.0022837382384442747,
"loss": 1.4262,
"step": 33700
},
{
"epoch": 13.59,
"learning_rate": 0.002281780426315188,
"loss": 1.4258,
"step": 33800
},
{
"epoch": 13.63,
"learning_rate": 0.0022797947421331574,
"loss": 1.422,
"step": 33900
},
{
"epoch": 13.67,
"learning_rate": 0.00227780093670161,
"loss": 1.4279,
"step": 34000
},
{
"epoch": 13.71,
"learning_rate": 0.0022757990257442533,
"loss": 1.4264,
"step": 34100
},
{
"epoch": 13.75,
"learning_rate": 0.0022737890250487188,
"loss": 1.4249,
"step": 34200
},
{
"epoch": 13.79,
"learning_rate": 0.0022717709504664323,
"loss": 1.4252,
"step": 34300
},
{
"epoch": 13.83,
"learning_rate": 0.002269744817912497,
"loss": 1.4198,
"step": 34400
},
{
"epoch": 13.87,
"learning_rate": 0.0022677106433655597,
"loss": 1.4259,
"step": 34500
},
{
"epoch": 13.91,
"learning_rate": 0.00226566844286769,
"loss": 1.4243,
"step": 34600
},
{
"epoch": 13.95,
"learning_rate": 0.002263618232524254,
"loss": 1.4211,
"step": 34700
},
{
"epoch": 13.99,
"learning_rate": 0.0022615600285037824,
"loss": 1.4259,
"step": 34800
},
{
"epoch": 14.0,
"eval_accuracy": 0.40785895242761616,
"eval_loss": 1.3789013624191284,
"eval_runtime": 18.1893,
"eval_samples_per_second": 4374.436,
"eval_steps_per_second": 17.098,
"step": 34814
},
{
"epoch": 14.03,
"learning_rate": 0.002259493847037849,
"loss": 1.4158,
"step": 34900
},
{
"epoch": 14.07,
"learning_rate": 0.002257419704420939,
"loss": 1.4142,
"step": 35000
},
{
"epoch": 14.11,
"learning_rate": 0.002255337617010322,
"loss": 1.4167,
"step": 35100
},
{
"epoch": 14.16,
"learning_rate": 0.0022532476012259205,
"loss": 1.4164,
"step": 35200
},
{
"epoch": 14.2,
"learning_rate": 0.0022511496735501853,
"loss": 1.4176,
"step": 35300
},
{
"epoch": 14.24,
"learning_rate": 0.0022490438505279606,
"loss": 1.4169,
"step": 35400
},
{
"epoch": 14.28,
"learning_rate": 0.0022469301487663563,
"loss": 1.4199,
"step": 35500
},
{
"epoch": 14.32,
"learning_rate": 0.002244808584934615,
"loss": 1.4194,
"step": 35600
},
{
"epoch": 14.36,
"learning_rate": 0.0022426791757639846,
"loss": 1.418,
"step": 35700
},
{
"epoch": 14.4,
"learning_rate": 0.00224054193804758,
"loss": 1.4175,
"step": 35800
},
{
"epoch": 14.44,
"learning_rate": 0.0022383968886402566,
"loss": 1.4203,
"step": 35900
},
{
"epoch": 14.48,
"learning_rate": 0.00223626561142873,
"loss": 1.422,
"step": 36000
},
{
"epoch": 14.52,
"learning_rate": 0.0022341050671441437,
"loss": 1.4194,
"step": 36100
},
{
"epoch": 14.56,
"learning_rate": 0.0022319367619316037,
"loss": 1.4161,
"step": 36200
},
{
"epoch": 14.6,
"learning_rate": 0.002229760712890972,
"loss": 1.4183,
"step": 36300
},
{
"epoch": 14.64,
"learning_rate": 0.00222757693718318,
"loss": 1.4212,
"step": 36400
},
{
"epoch": 14.68,
"learning_rate": 0.0022253854520300938,
"loss": 1.42,
"step": 36500
},
{
"epoch": 14.72,
"learning_rate": 0.0022231862747143795,
"loss": 1.4214,
"step": 36600
},
{
"epoch": 14.76,
"learning_rate": 0.0022209794225793644,
"loss": 1.4172,
"step": 36700
},
{
"epoch": 14.8,
"learning_rate": 0.002218764913028901,
"loss": 1.4243,
"step": 36800
},
{
"epoch": 14.84,
"learning_rate": 0.002216542763527233,
"loss": 1.4162,
"step": 36900
},
{
"epoch": 14.88,
"learning_rate": 0.0022143129915988525,
"loss": 1.4195,
"step": 37000
},
{
"epoch": 14.92,
"learning_rate": 0.0022120756148283644,
"loss": 1.4206,
"step": 37100
},
{
"epoch": 14.96,
"learning_rate": 0.002209830650860349,
"loss": 1.4193,
"step": 37200
},
{
"epoch": 15.0,
"learning_rate": 0.0022075781173992192,
"loss": 1.4177,
"step": 37300
},
{
"epoch": 15.0,
"eval_accuracy": 0.40866896027279703,
"eval_loss": 1.3724240064620972,
"eval_runtime": 18.1531,
"eval_samples_per_second": 4383.159,
"eval_steps_per_second": 17.132,
"step": 37301
},
{
"epoch": 15.04,
"learning_rate": 0.0022053180322090856,
"loss": 1.4072,
"step": 37400
},
{
"epoch": 15.08,
"learning_rate": 0.002203050413113611,
"loss": 1.4091,
"step": 37500
},
{
"epoch": 15.12,
"learning_rate": 0.0022007752779958753,
"loss": 1.4126,
"step": 37600
},
{
"epoch": 15.16,
"learning_rate": 0.0021984926447982302,
"loss": 1.4133,
"step": 37700
},
{
"epoch": 15.2,
"learning_rate": 0.00219620253152216,
"loss": 1.4127,
"step": 37800
},
{
"epoch": 15.24,
"learning_rate": 0.002193904956228139,
"loss": 1.4174,
"step": 37900
},
{
"epoch": 15.28,
"learning_rate": 0.0021915999370354894,
"loss": 1.4141,
"step": 38000
},
{
"epoch": 15.32,
"learning_rate": 0.002189287492122236,
"loss": 1.4127,
"step": 38100
},
{
"epoch": 15.36,
"learning_rate": 0.0021869676397249685,
"loss": 1.4144,
"step": 38200
},
{
"epoch": 15.4,
"learning_rate": 0.0021846403981386903,
"loss": 1.4158,
"step": 38300
},
{
"epoch": 15.44,
"learning_rate": 0.002182305785716681,
"loss": 1.4143,
"step": 38400
},
{
"epoch": 15.48,
"learning_rate": 0.002179963820870347,
"loss": 1.4132,
"step": 38500
},
{
"epoch": 15.52,
"learning_rate": 0.0021776145220690785,
"loss": 1.418,
"step": 38600
},
{
"epoch": 15.56,
"learning_rate": 0.0021752579078401038,
"loss": 1.4117,
"step": 38700
},
{
"epoch": 15.6,
"learning_rate": 0.002172893996768341,
"loss": 1.4159,
"step": 38800
},
{
"epoch": 15.64,
"learning_rate": 0.002170522807496255,
"loss": 1.4136,
"step": 38900
},
{
"epoch": 15.68,
"learning_rate": 0.0021681443587237086,
"loss": 1.4158,
"step": 39000
},
{
"epoch": 15.72,
"learning_rate": 0.002165758669207814,
"loss": 1.4137,
"step": 39100
},
{
"epoch": 15.76,
"learning_rate": 0.0021633657577627857,
"loss": 1.4131,
"step": 39200
},
{
"epoch": 15.8,
"learning_rate": 0.0021609656432597935,
"loss": 1.415,
"step": 39300
},
{
"epoch": 15.84,
"learning_rate": 0.002158558344626812,
"loss": 1.4097,
"step": 39400
},
{
"epoch": 15.88,
"learning_rate": 0.002156143880848472,
"loss": 1.4134,
"step": 39500
},
{
"epoch": 15.92,
"learning_rate": 0.0021537465223752917,
"loss": 1.4143,
"step": 39600
},
{
"epoch": 15.96,
"learning_rate": 0.002151317856661352,
"loss": 1.4132,
"step": 39700
},
{
"epoch": 16.0,
"eval_accuracy": 0.40897395230016736,
"eval_loss": 1.3687959909439087,
"eval_runtime": 21.4564,
"eval_samples_per_second": 3708.364,
"eval_steps_per_second": 14.495,
"step": 39788
},
{
"epoch": 16.0,
"learning_rate": 0.0021488820829025676,
"loss": 1.4126,
"step": 39800
},
{
"epoch": 16.05,
"learning_rate": 0.0021464392203081324,
"loss": 1.4066,
"step": 39900
},
{
"epoch": 16.09,
"learning_rate": 0.0021439892881431437,
"loss": 1.404,
"step": 40000
},
{
"epoch": 16.13,
"learning_rate": 0.00214155691038776,
"loss": 1.4058,
"step": 40100
},
{
"epoch": 16.17,
"learning_rate": 0.0021390929673124554,
"loss": 1.407,
"step": 40200
},
{
"epoch": 16.21,
"learning_rate": 0.002136622012601205,
"loss": 1.409,
"step": 40300
},
{
"epoch": 16.25,
"learning_rate": 0.002134144065740649,
"loss": 1.4099,
"step": 40400
},
{
"epoch": 16.29,
"learning_rate": 0.0021316591462725697,
"loss": 1.4068,
"step": 40500
},
{
"epoch": 16.33,
"learning_rate": 0.002129167273793738,
"loss": 1.4062,
"step": 40600
},
{
"epoch": 16.37,
"learning_rate": 0.002126668467955756,
"loss": 1.4082,
"step": 40700
},
{
"epoch": 16.41,
"learning_rate": 0.002124162748464908,
"loss": 1.4089,
"step": 40800
},
{
"epoch": 16.45,
"learning_rate": 0.0021216501350819973,
"loss": 1.4106,
"step": 40900
},
{
"epoch": 16.49,
"learning_rate": 0.0021191306476221975,
"loss": 1.4105,
"step": 41000
},
{
"epoch": 16.53,
"learning_rate": 0.002116604305954891,
"loss": 1.407,
"step": 41100
},
{
"epoch": 16.57,
"learning_rate": 0.0021140711300035153,
"loss": 1.4089,
"step": 41200
},
{
"epoch": 16.61,
"learning_rate": 0.0021115311397454047,
"loss": 1.4097,
"step": 41300
},
{
"epoch": 16.65,
"learning_rate": 0.0021089843552116335,
"loss": 1.4132,
"step": 41400
},
{
"epoch": 16.69,
"learning_rate": 0.0021064307964868572,
"loss": 1.4076,
"step": 41500
},
{
"epoch": 16.73,
"learning_rate": 0.002103870483709154,
"loss": 1.4095,
"step": 41600
},
{
"epoch": 16.77,
"learning_rate": 0.0021013034370698683,
"loss": 1.4111,
"step": 41700
},
{
"epoch": 16.81,
"learning_rate": 0.002098729676813446,
"loss": 1.4066,
"step": 41800
},
{
"epoch": 16.85,
"learning_rate": 0.002096149223237283,
"loss": 1.4072,
"step": 41900
},
{
"epoch": 16.89,
"learning_rate": 0.002093562096691557,
"loss": 1.4083,
"step": 42000
},
{
"epoch": 16.93,
"learning_rate": 0.0020909683175790723,
"loss": 1.4105,
"step": 42100
},
{
"epoch": 16.97,
"learning_rate": 0.002088367906355097,
"loss": 1.4091,
"step": 42200
},
{
"epoch": 17.0,
"eval_accuracy": 0.40902723297355015,
"eval_loss": 1.3661445379257202,
"eval_runtime": 18.1703,
"eval_samples_per_second": 4379.015,
"eval_steps_per_second": 17.116,
"step": 42274
},
{
"epoch": 17.01,
"learning_rate": 0.002085760883527201,
"loss": 1.4069,
"step": 42300
},
{
"epoch": 17.05,
"learning_rate": 0.002083147269655097,
"loss": 1.3983,
"step": 42400
},
{
"epoch": 17.09,
"learning_rate": 0.0020805270853504745,
"loss": 1.4009,
"step": 42500
},
{
"epoch": 17.13,
"learning_rate": 0.0020779003512768402,
"loss": 1.3998,
"step": 42600
},
{
"epoch": 17.17,
"learning_rate": 0.0020752670881493546,
"loss": 1.4026,
"step": 42700
},
{
"epoch": 17.21,
"learning_rate": 0.002072627316734667,
"loss": 1.4042,
"step": 42800
},
{
"epoch": 17.25,
"learning_rate": 0.0020699810578507544,
"loss": 1.4057,
"step": 42900
},
{
"epoch": 17.29,
"learning_rate": 0.002067328332366754,
"loss": 1.404,
"step": 43000
},
{
"epoch": 17.33,
"learning_rate": 0.002064669161202802,
"loss": 1.4059,
"step": 43100
},
{
"epoch": 17.37,
"learning_rate": 0.0020620035653298656,
"loss": 1.4007,
"step": 43200
},
{
"epoch": 17.41,
"learning_rate": 0.0020593315657695807,
"loss": 1.4018,
"step": 43300
},
{
"epoch": 17.45,
"learning_rate": 0.0020566531835940825,
"loss": 1.4044,
"step": 43400
},
{
"epoch": 17.49,
"learning_rate": 0.002053995318782436,
"loss": 1.4034,
"step": 43500
},
{
"epoch": 17.53,
"learning_rate": 0.0020513042980923276,
"loss": 1.4043,
"step": 43600
},
{
"epoch": 17.57,
"learning_rate": 0.0020486069580922853,
"loss": 1.4042,
"step": 43700
},
{
"epoch": 17.61,
"learning_rate": 0.0020459033200542877,
"loss": 1.4043,
"step": 43800
},
{
"epoch": 17.65,
"learning_rate": 0.002043193405299981,
"loss": 1.4043,
"step": 43900
},
{
"epoch": 17.69,
"learning_rate": 0.002040477235200511,
"loss": 1.4028,
"step": 44000
},
{
"epoch": 17.73,
"learning_rate": 0.0020377548311763553,
"loss": 1.4029,
"step": 44100
},
{
"epoch": 17.77,
"learning_rate": 0.0020350262146971543,
"loss": 1.4024,
"step": 44200
},
{
"epoch": 17.81,
"learning_rate": 0.002032291407281541,
"loss": 1.4044,
"step": 44300
},
{
"epoch": 17.85,
"learning_rate": 0.0020295504304969716,
"loss": 1.4015,
"step": 44400
},
{
"epoch": 17.89,
"learning_rate": 0.002026803305959556,
"loss": 1.4073,
"step": 44500
},
{
"epoch": 17.94,
"learning_rate": 0.002024050055333887,
"loss": 1.4031,
"step": 44600
},
{
"epoch": 17.98,
"learning_rate": 0.00202129070033287,
"loss": 1.4017,
"step": 44700
},
{
"epoch": 18.0,
"eval_accuracy": 0.4093614320145688,
"eval_loss": 1.3598405122756958,
"eval_runtime": 18.1233,
"eval_samples_per_second": 4390.377,
"eval_steps_per_second": 17.16,
"step": 44761
},
{
"epoch": 18.02,
"learning_rate": 0.002018525262717551,
"loss": 1.4004,
"step": 44800
},
{
"epoch": 18.06,
"learning_rate": 0.002015753764296944,
"loss": 1.3946,
"step": 44900
},
{
"epoch": 18.1,
"learning_rate": 0.0020129762269278624,
"loss": 1.3968,
"step": 45000
},
{
"epoch": 18.14,
"learning_rate": 0.0020101926725147414,
"loss": 1.395,
"step": 45100
},
{
"epoch": 18.18,
"learning_rate": 0.002007403123009471,
"loss": 1.3964,
"step": 45200
},
{
"epoch": 18.22,
"learning_rate": 0.00200460760041122,
"loss": 1.3958,
"step": 45300
},
{
"epoch": 18.26,
"learning_rate": 0.0020018061267662608,
"loss": 1.3974,
"step": 45400
},
{
"epoch": 18.3,
"learning_rate": 0.0019989987241677987,
"loss": 1.3964,
"step": 45500
},
{
"epoch": 18.34,
"learning_rate": 0.0019961854147557967,
"loss": 1.3991,
"step": 45600
},
{
"epoch": 18.38,
"learning_rate": 0.0019933662207167998,
"loss": 1.3967,
"step": 45700
},
{
"epoch": 18.42,
"learning_rate": 0.001990541164283761,
"loss": 1.3996,
"step": 45800
},
{
"epoch": 18.46,
"learning_rate": 0.001987710267735866,
"loss": 1.4007,
"step": 45900
},
{
"epoch": 18.5,
"learning_rate": 0.0019848735533983574,
"loss": 1.3999,
"step": 46000
},
{
"epoch": 18.54,
"learning_rate": 0.001982031043642358,
"loss": 1.3994,
"step": 46100
},
{
"epoch": 18.58,
"learning_rate": 0.001979182760884695,
"loss": 1.397,
"step": 46200
},
{
"epoch": 18.62,
"learning_rate": 0.001976328727587724,
"loss": 1.3993,
"step": 46300
},
{
"epoch": 18.66,
"learning_rate": 0.00197346896625915,
"loss": 1.3997,
"step": 46400
},
{
"epoch": 18.7,
"learning_rate": 0.0019706034994518506,
"loss": 1.3972,
"step": 46500
},
{
"epoch": 18.74,
"learning_rate": 0.0019677323497636996,
"loss": 1.3987,
"step": 46600
},
{
"epoch": 18.78,
"learning_rate": 0.0019648555398373868,
"loss": 1.3996,
"step": 46700
},
{
"epoch": 18.82,
"learning_rate": 0.0019619730923602394,
"loss": 1.3993,
"step": 46800
},
{
"epoch": 18.86,
"learning_rate": 0.001959085030064046,
"loss": 1.4007,
"step": 46900
},
{
"epoch": 18.9,
"learning_rate": 0.001956191375724874,
"loss": 1.4001,
"step": 47000
},
{
"epoch": 18.94,
"learning_rate": 0.0019532921521628905,
"loss": 1.3984,
"step": 47100
},
{
"epoch": 18.98,
"learning_rate": 0.0019503873822421841,
"loss": 1.4005,
"step": 47200
},
{
"epoch": 19.0,
"eval_accuracy": 0.4099590252150344,
"eval_loss": 1.353163480758667,
"eval_runtime": 18.0162,
"eval_samples_per_second": 4416.478,
"eval_steps_per_second": 17.262,
"step": 47248
},
{
"epoch": 19.02,
"learning_rate": 0.001947477088870584,
"loss": 1.3927,
"step": 47300
},
{
"epoch": 19.06,
"learning_rate": 0.0019445612949994774,
"loss": 1.39,
"step": 47400
},
{
"epoch": 19.1,
"learning_rate": 0.0019416400236236303,
"loss": 1.3912,
"step": 47500
},
{
"epoch": 19.14,
"learning_rate": 0.0019387132977810076,
"loss": 1.3908,
"step": 47600
},
{
"epoch": 19.18,
"learning_rate": 0.0019357811405525877,
"loss": 1.3917,
"step": 47700
},
{
"epoch": 19.22,
"learning_rate": 0.0019328435750621822,
"loss": 1.3978,
"step": 47800
},
{
"epoch": 19.26,
"learning_rate": 0.0019299006244762559,
"loss": 1.3935,
"step": 47900
},
{
"epoch": 19.3,
"learning_rate": 0.0019269523120037401,
"loss": 1.3905,
"step": 48000
},
{
"epoch": 19.34,
"learning_rate": 0.001923998660895852,
"loss": 1.3952,
"step": 48100
},
{
"epoch": 19.38,
"learning_rate": 0.001921039694445911,
"loss": 1.3941,
"step": 48200
},
{
"epoch": 19.42,
"learning_rate": 0.0019180754359891545,
"loss": 1.3909,
"step": 48300
},
{
"epoch": 19.46,
"learning_rate": 0.0019151059089025538,
"loss": 1.3919,
"step": 48400
},
{
"epoch": 19.5,
"learning_rate": 0.0019121311366046307,
"loss": 1.3894,
"step": 48500
},
{
"epoch": 19.54,
"learning_rate": 0.0019091511425552727,
"loss": 1.3954,
"step": 48600
},
{
"epoch": 19.58,
"learning_rate": 0.001906165950255546,
"loss": 1.3925,
"step": 48700
},
{
"epoch": 19.62,
"learning_rate": 0.0019031755832475124,
"loss": 1.3963,
"step": 48800
},
{
"epoch": 19.66,
"learning_rate": 0.0019001800651140435,
"loss": 1.3937,
"step": 48900
},
{
"epoch": 19.7,
"learning_rate": 0.0018971794194786334,
"loss": 1.3949,
"step": 49000
},
{
"epoch": 19.74,
"learning_rate": 0.001894173670005213,
"loss": 1.3935,
"step": 49100
},
{
"epoch": 19.78,
"learning_rate": 0.0018911628403979648,
"loss": 1.3952,
"step": 49200
},
{
"epoch": 19.83,
"learning_rate": 0.0018882676866254673,
"loss": 1.4002,
"step": 49300
},
{
"epoch": 19.87,
"learning_rate": 0.0018852469688702364,
"loss": 1.3918,
"step": 49400
},
{
"epoch": 19.91,
"learning_rate": 0.0018822212413796408,
"loss": 1.3927,
"step": 49500
},
{
"epoch": 19.95,
"learning_rate": 0.001879190528015413,
"loss": 1.3968,
"step": 49600
},
{
"epoch": 19.99,
"learning_rate": 0.001876154852678608,
"loss": 1.3949,
"step": 49700
},
{
"epoch": 20.0,
"eval_accuracy": 0.41007195900114135,
"eval_loss": 1.3496698141098022,
"eval_runtime": 18.2296,
"eval_samples_per_second": 4364.763,
"eval_steps_per_second": 17.06,
"step": 49735
},
{
"epoch": 20.03,
"learning_rate": 0.0018731142393094097,
"loss": 1.3862,
"step": 49800
},
{
"epoch": 20.07,
"learning_rate": 0.0018700687118869469,
"loss": 1.3861,
"step": 49900
},
{
"epoch": 20.11,
"learning_rate": 0.0018670182944291003,
"loss": 1.3836,
"step": 50000
},
{
"epoch": 20.15,
"learning_rate": 0.0018639630109923166,
"loss": 1.385,
"step": 50100
},
{
"epoch": 20.19,
"learning_rate": 0.0018609028856714155,
"loss": 1.3868,
"step": 50200
},
{
"epoch": 20.23,
"learning_rate": 0.0018578379425994015,
"loss": 1.387,
"step": 50300
},
{
"epoch": 20.27,
"learning_rate": 0.0018547682059472737,
"loss": 1.3889,
"step": 50400
},
{
"epoch": 20.31,
"learning_rate": 0.0018516936999238337,
"loss": 1.3892,
"step": 50500
},
{
"epoch": 20.35,
"learning_rate": 0.0018486144487754972,
"loss": 1.3875,
"step": 50600
},
{
"epoch": 20.39,
"learning_rate": 0.0018455304767861,
"loss": 1.3896,
"step": 50700
},
{
"epoch": 20.43,
"learning_rate": 0.0018424418082767084,
"loss": 1.3891,
"step": 50800
},
{
"epoch": 20.47,
"learning_rate": 0.0018393484676054264,
"loss": 1.3918,
"step": 50900
},
{
"epoch": 20.51,
"learning_rate": 0.0018362504791672044,
"loss": 1.3882,
"step": 51000
},
{
"epoch": 20.55,
"learning_rate": 0.0018331478673936463,
"loss": 1.3859,
"step": 51100
},
{
"epoch": 20.59,
"learning_rate": 0.0018300406567528164,
"loss": 1.3901,
"step": 51200
},
{
"epoch": 20.63,
"learning_rate": 0.0018269288717490475,
"loss": 1.3866,
"step": 51300
},
{
"epoch": 20.67,
"learning_rate": 0.001823812536922747,
"loss": 1.388,
"step": 51400
},
{
"epoch": 20.71,
"learning_rate": 0.0018206916768502036,
"loss": 1.3902,
"step": 51500
},
{
"epoch": 20.75,
"learning_rate": 0.0018175663161433928,
"loss": 1.3894,
"step": 51600
},
{
"epoch": 20.79,
"learning_rate": 0.001814436479449784,
"loss": 1.3872,
"step": 51700
},
{
"epoch": 20.83,
"learning_rate": 0.0018113021914521452,
"loss": 1.3891,
"step": 51800
},
{
"epoch": 20.87,
"learning_rate": 0.0018081634768683486,
"loss": 1.3886,
"step": 51900
},
{
"epoch": 20.91,
"learning_rate": 0.0018050203604511755,
"loss": 1.3868,
"step": 52000
},
{
"epoch": 20.95,
"learning_rate": 0.001801872866988122,
"loss": 1.3863,
"step": 52100
},
{
"epoch": 20.99,
"learning_rate": 0.0017987210213012022,
"loss": 1.3885,
"step": 52200
},
{
"epoch": 21.0,
"eval_accuracy": 0.41060529677158064,
"eval_loss": 1.3434184789657593,
"eval_runtime": 17.9588,
"eval_samples_per_second": 4430.592,
"eval_steps_per_second": 17.317,
"step": 52221
},
{
"epoch": 21.03,
"learning_rate": 0.0017955648482467531,
"loss": 1.3787,
"step": 52300
},
{
"epoch": 21.07,
"learning_rate": 0.001792404372715239,
"loss": 1.3773,
"step": 52400
},
{
"epoch": 21.11,
"learning_rate": 0.0017892396196310535,
"loss": 1.3854,
"step": 52500
},
{
"epoch": 21.15,
"learning_rate": 0.0017860706139523259,
"loss": 1.3818,
"step": 52600
},
{
"epoch": 21.19,
"learning_rate": 0.0017828973806707216,
"loss": 1.3794,
"step": 52700
},
{
"epoch": 21.23,
"learning_rate": 0.0017797199448112463,
"loss": 1.3801,
"step": 52800
},
{
"epoch": 21.27,
"learning_rate": 0.0017765383314320483,
"loss": 1.3782,
"step": 52900
},
{
"epoch": 21.31,
"learning_rate": 0.0017733525656242206,
"loss": 1.3825,
"step": 53000
},
{
"epoch": 21.35,
"learning_rate": 0.0017701626725116044,
"loss": 1.3808,
"step": 53100
},
{
"epoch": 21.39,
"learning_rate": 0.0017669686772505894,
"loss": 1.3836,
"step": 53200
},
{
"epoch": 21.43,
"learning_rate": 0.0017637706050299164,
"loss": 1.3839,
"step": 53300
},
{
"epoch": 21.47,
"learning_rate": 0.001760568481070477,
"loss": 1.3851,
"step": 53400
},
{
"epoch": 21.51,
"learning_rate": 0.001757362330625117,
"loss": 1.3832,
"step": 53500
},
{
"epoch": 21.55,
"learning_rate": 0.001754152178978437,
"loss": 1.3836,
"step": 53600
},
{
"epoch": 21.59,
"learning_rate": 0.0017509380514465903,
"loss": 1.3857,
"step": 53700
},
{
"epoch": 21.63,
"learning_rate": 0.0017477199733770866,
"loss": 1.3842,
"step": 53800
},
{
"epoch": 21.67,
"learning_rate": 0.0017444979701485893,
"loss": 1.3831,
"step": 53900
},
{
"epoch": 21.72,
"learning_rate": 0.0017412720671707183,
"loss": 1.3824,
"step": 54000
},
{
"epoch": 21.76,
"learning_rate": 0.001738042289883847,
"loss": 1.3849,
"step": 54100
},
{
"epoch": 21.8,
"learning_rate": 0.0017348086637589029,
"loss": 1.3819,
"step": 54200
},
{
"epoch": 21.84,
"learning_rate": 0.0017315712142971665,
"loss": 1.3827,
"step": 54300
},
{
"epoch": 21.88,
"learning_rate": 0.00172832996703007,
"loss": 1.3791,
"step": 54400
},
{
"epoch": 21.92,
"learning_rate": 0.0017250849475189966,
"loss": 1.3841,
"step": 54500
},
{
"epoch": 21.96,
"learning_rate": 0.0017218361813550788,
"loss": 1.385,
"step": 54600
},
{
"epoch": 22.0,
"learning_rate": 0.001718583694158995,
"loss": 1.3819,
"step": 54700
},
{
"epoch": 22.0,
"eval_accuracy": 0.4112035980208619,
"eval_loss": 1.3387858867645264,
"eval_runtime": 18.0468,
"eval_samples_per_second": 4408.97,
"eval_steps_per_second": 17.233,
"step": 54708
},
{
"epoch": 22.04,
"learning_rate": 0.0017153275115807693,
"loss": 1.3717,
"step": 54800
},
{
"epoch": 22.08,
"learning_rate": 0.0017120676592995692,
"loss": 1.3738,
"step": 54900
},
{
"epoch": 22.12,
"learning_rate": 0.0017088041630235018,
"loss": 1.3753,
"step": 55000
},
{
"epoch": 22.16,
"learning_rate": 0.0017055370484894122,
"loss": 1.3748,
"step": 55100
},
{
"epoch": 22.2,
"learning_rate": 0.0017022663414626796,
"loss": 1.3736,
"step": 55200
},
{
"epoch": 22.24,
"learning_rate": 0.0016989920677370153,
"loss": 1.3806,
"step": 55300
},
{
"epoch": 22.28,
"learning_rate": 0.0016957142531342578,
"loss": 1.3772,
"step": 55400
},
{
"epoch": 22.32,
"learning_rate": 0.0016924329235041703,
"loss": 1.3804,
"step": 55500
},
{
"epoch": 22.36,
"learning_rate": 0.0016891481047242368,
"loss": 1.3765,
"step": 55600
},
{
"epoch": 22.4,
"learning_rate": 0.0016858598226994572,
"loss": 1.3762,
"step": 55700
},
{
"epoch": 22.44,
"learning_rate": 0.0016825681033621443,
"loss": 1.3767,
"step": 55800
},
{
"epoch": 22.48,
"learning_rate": 0.0016792729726717171,
"loss": 1.375,
"step": 55900
},
{
"epoch": 22.52,
"learning_rate": 0.0016759744566144986,
"loss": 1.3782,
"step": 56000
},
{
"epoch": 22.56,
"learning_rate": 0.0016726725812035098,
"loss": 1.3767,
"step": 56100
},
{
"epoch": 22.6,
"learning_rate": 0.0016693673724782643,
"loss": 1.378,
"step": 56200
},
{
"epoch": 22.64,
"learning_rate": 0.0016660588565045626,
"loss": 1.3797,
"step": 56300
},
{
"epoch": 22.68,
"learning_rate": 0.0016627470593742876,
"loss": 1.376,
"step": 56400
},
{
"epoch": 22.72,
"learning_rate": 0.0016594320072051982,
"loss": 1.3778,
"step": 56500
},
{
"epoch": 22.76,
"learning_rate": 0.0016561137261407242,
"loss": 1.3776,
"step": 56600
},
{
"epoch": 22.8,
"learning_rate": 0.001652792242349758,
"loss": 1.3759,
"step": 56700
},
{
"epoch": 22.84,
"learning_rate": 0.0016494675820264502,
"loss": 1.3764,
"step": 56800
},
{
"epoch": 22.88,
"learning_rate": 0.001646139771390003,
"loss": 1.3788,
"step": 56900
},
{
"epoch": 22.92,
"learning_rate": 0.0016428088366844615,
"loss": 1.3767,
"step": 57000
},
{
"epoch": 22.96,
"learning_rate": 0.0016394748041785093,
"loss": 1.3793,
"step": 57100
},
{
"epoch": 23.0,
"eval_accuracy": 0.4114731876074464,
"eval_loss": 1.3357292413711548,
"eval_runtime": 18.1868,
"eval_samples_per_second": 4375.034,
"eval_steps_per_second": 17.1,
"step": 57195
},
{
"epoch": 23.0,
"learning_rate": 0.0016361377001652606,
"loss": 1.3783,
"step": 57200
},
{
"epoch": 23.04,
"learning_rate": 0.0016327975509620501,
"loss": 1.3643,
"step": 57300
},
{
"epoch": 23.08,
"learning_rate": 0.0016294543829102304,
"loss": 1.3689,
"step": 57400
},
{
"epoch": 23.12,
"learning_rate": 0.0016261082223749596,
"loss": 1.371,
"step": 57500
},
{
"epoch": 23.16,
"learning_rate": 0.0016227590957449958,
"loss": 1.3684,
"step": 57600
},
{
"epoch": 23.2,
"learning_rate": 0.0016194070294324897,
"loss": 1.3739,
"step": 57700
},
{
"epoch": 23.24,
"learning_rate": 0.0016160520498727739,
"loss": 1.3706,
"step": 57800
},
{
"epoch": 23.28,
"learning_rate": 0.001612694183524156,
"loss": 1.3708,
"step": 57900
},
{
"epoch": 23.32,
"learning_rate": 0.0016093334568677096,
"loss": 1.3712,
"step": 58000
},
{
"epoch": 23.36,
"learning_rate": 0.0016059698964070654,
"loss": 1.3712,
"step": 58100
},
{
"epoch": 23.4,
"learning_rate": 0.0016026035286682032,
"loss": 1.3743,
"step": 58200
},
{
"epoch": 23.44,
"learning_rate": 0.0015992343801992403,
"loss": 1.3704,
"step": 58300
},
{
"epoch": 23.48,
"learning_rate": 0.0015958624775702249,
"loss": 1.3716,
"step": 58400
},
{
"epoch": 23.52,
"learning_rate": 0.0015925216070889932,
"loss": 1.3719,
"step": 58500
},
{
"epoch": 23.56,
"learning_rate": 0.00158914430281443,
"loss": 1.3713,
"step": 58600
},
{
"epoch": 23.61,
"learning_rate": 0.001585764323952988,
"loss": 1.3679,
"step": 58700
},
{
"epoch": 23.65,
"learning_rate": 0.0015823816971601262,
"loss": 1.3728,
"step": 58800
},
{
"epoch": 23.69,
"learning_rate": 0.0015789964491121867,
"loss": 1.3732,
"step": 58900
},
{
"epoch": 23.73,
"learning_rate": 0.0015756086065061815,
"loss": 1.3729,
"step": 59000
},
{
"epoch": 23.77,
"learning_rate": 0.001572218196059587,
"loss": 1.3738,
"step": 59100
},
{
"epoch": 23.81,
"learning_rate": 0.0015688252445101281,
"loss": 1.3732,
"step": 59200
},
{
"epoch": 23.85,
"learning_rate": 0.00156542977861557,
"loss": 1.3693,
"step": 59300
},
{
"epoch": 23.89,
"learning_rate": 0.0015620318251535073,
"loss": 1.3726,
"step": 59400
},
{
"epoch": 23.93,
"learning_rate": 0.001558631410921152,
"loss": 1.3702,
"step": 59500
},
{
"epoch": 23.97,
"learning_rate": 0.0015552626031769532,
"loss": 1.3728,
"step": 59600
},
{
"epoch": 24.0,
"eval_accuracy": 0.41196191830249546,
"eval_loss": 1.3301055431365967,
"eval_runtime": 17.9639,
"eval_samples_per_second": 4429.334,
"eval_steps_per_second": 17.313,
"step": 59682
},
{
"epoch": 24.01,
"learning_rate": 0.0015518573718113404,
"loss": 1.3665,
"step": 59700
},
{
"epoch": 24.05,
"learning_rate": 0.001548449759914021,
"loss": 1.3565,
"step": 59800
},
{
"epoch": 24.09,
"learning_rate": 0.0015450397943583764,
"loss": 1.362,
"step": 59900
},
{
"epoch": 24.13,
"learning_rate": 0.0015416275020363487,
"loss": 1.3646,
"step": 60000
},
{
"epoch": 24.17,
"learning_rate": 0.0015382129098582302,
"loss": 1.36,
"step": 60100
},
{
"epoch": 24.21,
"learning_rate": 0.0015347960447524506,
"loss": 1.364,
"step": 60200
},
{
"epoch": 24.25,
"learning_rate": 0.0015313769336653643,
"loss": 1.3669,
"step": 60300
},
{
"epoch": 24.29,
"learning_rate": 0.0015279556035610376,
"loss": 1.3645,
"step": 60400
},
{
"epoch": 24.33,
"learning_rate": 0.0015245320814210378,
"loss": 1.3645,
"step": 60500
},
{
"epoch": 24.37,
"learning_rate": 0.0015211063942442184,
"loss": 1.3657,
"step": 60600
},
{
"epoch": 24.41,
"learning_rate": 0.0015176785690465067,
"loss": 1.3671,
"step": 60700
},
{
"epoch": 24.45,
"learning_rate": 0.001514248632860692,
"loss": 1.3664,
"step": 60800
},
{
"epoch": 24.49,
"learning_rate": 0.0015108166127362102,
"loss": 1.3678,
"step": 60900
},
{
"epoch": 24.53,
"learning_rate": 0.0015073825357389335,
"loss": 1.3682,
"step": 61000
},
{
"epoch": 24.57,
"learning_rate": 0.001503946428950953,
"loss": 1.3646,
"step": 61100
},
{
"epoch": 24.61,
"learning_rate": 0.0015005083194703692,
"loss": 1.3662,
"step": 61200
},
{
"epoch": 24.65,
"learning_rate": 0.0014970682344110752,
"loss": 1.3672,
"step": 61300
},
{
"epoch": 24.69,
"learning_rate": 0.0014936262009025445,
"loss": 1.3652,
"step": 61400
},
{
"epoch": 24.73,
"learning_rate": 0.0014901822460896164,
"loss": 1.366,
"step": 61500
},
{
"epoch": 24.77,
"learning_rate": 0.0014867363971322828,
"loss": 1.3638,
"step": 61600
},
{
"epoch": 24.81,
"learning_rate": 0.0014832886812054728,
"loss": 1.3659,
"step": 61700
},
{
"epoch": 24.85,
"learning_rate": 0.0014798391254988387,
"loss": 1.3647,
"step": 61800
},
{
"epoch": 24.89,
"learning_rate": 0.0014763877572165419,
"loss": 1.3667,
"step": 61900
},
{
"epoch": 24.93,
"learning_rate": 0.0014729691438615455,
"loss": 1.3653,
"step": 62000
},
{
"epoch": 24.97,
"learning_rate": 0.0014695142495437725,
"loss": 1.3671,
"step": 62100
},
{
"epoch": 25.0,
"eval_accuracy": 0.4123754188108745,
"eval_loss": 1.3233122825622559,
"eval_runtime": 21.5586,
"eval_samples_per_second": 3690.779,
"eval_steps_per_second": 14.426,
"step": 62168
},
{
"epoch": 25.01,
"learning_rate": 0.001466057624075198,
"loss": 1.363,
"step": 62200
},
{
"epoch": 25.05,
"learning_rate": 0.0014625992947157381,
"loss": 1.3519,
"step": 62300
},
{
"epoch": 25.09,
"learning_rate": 0.0014591392887387455,
"loss": 1.3502,
"step": 62400
},
{
"epoch": 25.13,
"learning_rate": 0.0014556776334307964,
"loss": 1.3581,
"step": 62500
},
{
"epoch": 25.17,
"learning_rate": 0.0014522143560914728,
"loss": 1.3562,
"step": 62600
},
{
"epoch": 25.21,
"learning_rate": 0.001448749484033149,
"loss": 1.3593,
"step": 62700
},
{
"epoch": 25.25,
"learning_rate": 0.0014452830445807752,
"loss": 1.3583,
"step": 62800
},
{
"epoch": 25.29,
"learning_rate": 0.0014418150650716635,
"loss": 1.3601,
"step": 62900
},
{
"epoch": 25.33,
"learning_rate": 0.0014383455728552708,
"loss": 1.3611,
"step": 63000
},
{
"epoch": 25.37,
"learning_rate": 0.0014348745952929838,
"loss": 1.36,
"step": 63100
},
{
"epoch": 25.41,
"learning_rate": 0.0014314021597579025,
"loss": 1.3599,
"step": 63200
},
{
"epoch": 25.45,
"learning_rate": 0.0014279282936346253,
"loss": 1.3618,
"step": 63300
},
{
"epoch": 25.5,
"learning_rate": 0.0014244530243190323,
"loss": 1.3617,
"step": 63400
},
{
"epoch": 25.54,
"learning_rate": 0.0014209763792180708,
"loss": 1.3595,
"step": 63500
},
{
"epoch": 25.58,
"learning_rate": 0.001417498385749536,
"loss": 1.361,
"step": 63600
},
{
"epoch": 25.62,
"learning_rate": 0.0014140190713418583,
"loss": 1.355,
"step": 63700
},
{
"epoch": 25.66,
"learning_rate": 0.0014105384634338846,
"loss": 1.3624,
"step": 63800
},
{
"epoch": 25.7,
"learning_rate": 0.0014070565894746625,
"loss": 1.3633,
"step": 63900
},
{
"epoch": 25.74,
"learning_rate": 0.001403573476923225,
"loss": 1.3595,
"step": 64000
},
{
"epoch": 25.78,
"learning_rate": 0.0014000891532483723,
"loss": 1.3595,
"step": 64100
},
{
"epoch": 25.82,
"learning_rate": 0.0013966036459284556,
"loss": 1.3603,
"step": 64200
},
{
"epoch": 25.86,
"learning_rate": 0.001393116982451161,
"loss": 1.361,
"step": 64300
},
{
"epoch": 25.9,
"learning_rate": 0.001389629190313293,
"loss": 1.3588,
"step": 64400
},
{
"epoch": 25.94,
"learning_rate": 0.001386140297020556,
"loss": 1.3593,
"step": 64500
},
{
"epoch": 25.98,
"learning_rate": 0.0013826503300873386,
"loss": 1.3586,
"step": 64600
},
{
"epoch": 26.0,
"eval_accuracy": 0.41281971944273727,
"eval_loss": 1.3182361125946045,
"eval_runtime": 18.1839,
"eval_samples_per_second": 4375.747,
"eval_steps_per_second": 17.103,
"step": 64655
},
{
"epoch": 26.02,
"learning_rate": 0.0013791593170364973,
"loss": 1.3551,
"step": 64700
},
{
"epoch": 26.06,
"learning_rate": 0.0013756672853991376,
"loss": 1.3495,
"step": 64800
},
{
"epoch": 26.1,
"learning_rate": 0.0013721742627143979,
"loss": 1.3501,
"step": 64900
},
{
"epoch": 26.14,
"learning_rate": 0.0013686802765292333,
"loss": 1.3509,
"step": 65000
},
{
"epoch": 26.18,
"learning_rate": 0.0013651853543981964,
"loss": 1.3532,
"step": 65100
},
{
"epoch": 26.22,
"learning_rate": 0.0013616895238832208,
"loss": 1.3516,
"step": 65200
},
{
"epoch": 26.26,
"learning_rate": 0.0013581928125534046,
"loss": 1.3548,
"step": 65300
},
{
"epoch": 26.3,
"learning_rate": 0.0013546952479847913,
"loss": 1.3533,
"step": 65400
},
{
"epoch": 26.34,
"learning_rate": 0.0013511968577601544,
"loss": 1.3521,
"step": 65500
},
{
"epoch": 26.38,
"learning_rate": 0.001347697669468778,
"loss": 1.3535,
"step": 65600
},
{
"epoch": 26.42,
"learning_rate": 0.0013441977107062405,
"loss": 1.3553,
"step": 65700
},
{
"epoch": 26.46,
"learning_rate": 0.001340697009074196,
"loss": 1.3534,
"step": 65800
},
{
"epoch": 26.5,
"learning_rate": 0.0013371955921801565,
"loss": 1.3545,
"step": 65900
},
{
"epoch": 26.54,
"learning_rate": 0.0013336934876372766,
"loss": 1.355,
"step": 66000
},
{
"epoch": 26.58,
"learning_rate": 0.001330190723064132,
"loss": 1.3519,
"step": 66100
},
{
"epoch": 26.62,
"learning_rate": 0.0013266873260845047,
"loss": 1.352,
"step": 66200
},
{
"epoch": 26.66,
"learning_rate": 0.0013231833243271636,
"loss": 1.3541,
"step": 66300
},
{
"epoch": 26.7,
"learning_rate": 0.0013196787454256467,
"loss": 1.3526,
"step": 66400
},
{
"epoch": 26.74,
"learning_rate": 0.0013161736170180446,
"loss": 1.3578,
"step": 66500
},
{
"epoch": 26.78,
"learning_rate": 0.0013126679667467805,
"loss": 1.3519,
"step": 66600
},
{
"epoch": 26.82,
"learning_rate": 0.0013093020771800632,
"loss": 1.3564,
"step": 66700
},
{
"epoch": 26.86,
"learning_rate": 0.0013057954842567222,
"loss": 1.3507,
"step": 66800
},
{
"epoch": 26.9,
"learning_rate": 0.0013022884513145783,
"loss": 1.3546,
"step": 66900
},
{
"epoch": 26.94,
"learning_rate": 0.0012987810060110744,
"loss": 1.3533,
"step": 67000
},
{
"epoch": 26.98,
"learning_rate": 0.0012952731760069067,
"loss": 1.354,
"step": 67100
},
{
"epoch": 27.0,
"eval_accuracy": 0.4132404774514774,
"eval_loss": 1.3120365142822266,
"eval_runtime": 18.1888,
"eval_samples_per_second": 4374.571,
"eval_steps_per_second": 17.098,
"step": 67142
},
{
"epoch": 27.02,
"learning_rate": 0.0012917649889658037,
"loss": 1.3444,
"step": 67200
},
{
"epoch": 27.06,
"learning_rate": 0.0012882564725543097,
"loss": 1.3432,
"step": 67300
},
{
"epoch": 27.1,
"learning_rate": 0.0012847476544415677,
"loss": 1.3426,
"step": 67400
},
{
"epoch": 27.14,
"learning_rate": 0.0012812385622990989,
"loss": 1.341,
"step": 67500
},
{
"epoch": 27.18,
"learning_rate": 0.0012777292238005855,
"loss": 1.3417,
"step": 67600
},
{
"epoch": 27.22,
"learning_rate": 0.0012742196666216538,
"loss": 1.3482,
"step": 67700
},
{
"epoch": 27.26,
"learning_rate": 0.0012707099184396534,
"loss": 1.3497,
"step": 67800
},
{
"epoch": 27.3,
"learning_rate": 0.0012672000069334408,
"loss": 1.348,
"step": 67900
},
{
"epoch": 27.34,
"learning_rate": 0.0012636899597831602,
"loss": 1.3479,
"step": 68000
},
{
"epoch": 27.39,
"learning_rate": 0.0012601798046700254,
"loss": 1.3492,
"step": 68100
},
{
"epoch": 27.43,
"learning_rate": 0.0012566695692761026,
"loss": 1.3451,
"step": 68200
},
{
"epoch": 27.47,
"learning_rate": 0.00125315928128409,
"loss": 1.3485,
"step": 68300
},
{
"epoch": 27.51,
"learning_rate": 0.001249648968377101,
"loss": 1.3481,
"step": 68400
},
{
"epoch": 27.55,
"learning_rate": 0.0012461386582384459,
"loss": 1.3471,
"step": 68500
},
{
"epoch": 27.59,
"learning_rate": 0.0012426283785514122,
"loss": 1.347,
"step": 68600
},
{
"epoch": 27.63,
"learning_rate": 0.0012391181569990483,
"loss": 1.3473,
"step": 68700
},
{
"epoch": 27.67,
"learning_rate": 0.0012356080212639429,
"loss": 1.3478,
"step": 68800
},
{
"epoch": 27.71,
"learning_rate": 0.0012320979990280098,
"loss": 1.3484,
"step": 68900
},
{
"epoch": 27.75,
"learning_rate": 0.0012285881179722661,
"loss": 1.3476,
"step": 69000
},
{
"epoch": 27.79,
"learning_rate": 0.001225078405776616,
"loss": 1.3486,
"step": 69100
},
{
"epoch": 27.83,
"learning_rate": 0.0012215688901196322,
"loss": 1.3485,
"step": 69200
},
{
"epoch": 27.87,
"learning_rate": 0.0012180595986783368,
"loss": 1.3457,
"step": 69300
},
{
"epoch": 27.91,
"learning_rate": 0.0012145505591279848,
"loss": 1.3455,
"step": 69400
},
{
"epoch": 27.95,
"learning_rate": 0.0012110417991418438,
"loss": 1.349,
"step": 69500
},
{
"epoch": 27.99,
"learning_rate": 0.0012075333463909769,
"loss": 1.3472,
"step": 69600
},
{
"epoch": 28.0,
"eval_accuracy": 0.41376142436764163,
"eval_loss": 1.304537057876587,
"eval_runtime": 17.96,
"eval_samples_per_second": 4430.295,
"eval_steps_per_second": 17.316,
"step": 69629
},
{
"epoch": 28.03,
"learning_rate": 0.001204025228544024,
"loss": 1.338,
"step": 69700
},
{
"epoch": 28.07,
"learning_rate": 0.0012005174732669848,
"loss": 1.3342,
"step": 69800
},
{
"epoch": 28.11,
"learning_rate": 0.0011970101082229981,
"loss": 1.3388,
"step": 69900
},
{
"epoch": 28.15,
"learning_rate": 0.0011935031610721273,
"loss": 1.34,
"step": 70000
},
{
"epoch": 28.19,
"learning_rate": 0.0011899966594711383,
"loss": 1.3389,
"step": 70100
},
{
"epoch": 28.23,
"learning_rate": 0.0011864906310732845,
"loss": 1.338,
"step": 70200
},
{
"epoch": 28.27,
"learning_rate": 0.0011829851035280864,
"loss": 1.3411,
"step": 70300
},
{
"epoch": 28.31,
"learning_rate": 0.0011794801044811157,
"loss": 1.3373,
"step": 70400
},
{
"epoch": 28.35,
"learning_rate": 0.0011759756615737753,
"loss": 1.3411,
"step": 70500
},
{
"epoch": 28.39,
"learning_rate": 0.0011724718024430827,
"loss": 1.3412,
"step": 70600
},
{
"epoch": 28.43,
"learning_rate": 0.0011689685547214518,
"loss": 1.3413,
"step": 70700
},
{
"epoch": 28.47,
"learning_rate": 0.0011654659460364745,
"loss": 1.3395,
"step": 70800
},
{
"epoch": 28.51,
"learning_rate": 0.0011619640040107027,
"loss": 1.3418,
"step": 70900
},
{
"epoch": 28.55,
"learning_rate": 0.0011584627562614316,
"loss": 1.3402,
"step": 71000
},
{
"epoch": 28.59,
"learning_rate": 0.0011549622304004806,
"loss": 1.3412,
"step": 71100
},
{
"epoch": 28.63,
"learning_rate": 0.0011514624540339764,
"loss": 1.3396,
"step": 71200
},
{
"epoch": 28.67,
"learning_rate": 0.0011479634547621343,
"loss": 1.339,
"step": 71300
},
{
"epoch": 28.71,
"learning_rate": 0.0011444652601790425,
"loss": 1.3399,
"step": 71400
},
{
"epoch": 28.75,
"learning_rate": 0.0011409678978724417,
"loss": 1.3419,
"step": 71500
},
{
"epoch": 28.79,
"learning_rate": 0.0011374713954235103,
"loss": 1.3423,
"step": 71600
},
{
"epoch": 28.83,
"learning_rate": 0.0011339757804066446,
"loss": 1.3402,
"step": 71700
},
{
"epoch": 28.87,
"learning_rate": 0.0011304810803892432,
"loss": 1.3436,
"step": 71800
},
{
"epoch": 28.91,
"learning_rate": 0.0011269873229314876,
"loss": 1.3402,
"step": 71900
},
{
"epoch": 28.95,
"learning_rate": 0.0011234945355861282,
"loss": 1.3406,
"step": 72000
},
{
"epoch": 28.99,
"learning_rate": 0.0011200027458982628,
"loss": 1.3393,
"step": 72100
},
{
"epoch": 29.0,
"eval_accuracy": 0.4142487389650592,
"eval_loss": 1.2993851900100708,
"eval_runtime": 18.3165,
"eval_samples_per_second": 4344.051,
"eval_steps_per_second": 16.979,
"step": 72115
},
{
"epoch": 29.03,
"learning_rate": 0.0011165119814051218,
"loss": 1.3309,
"step": 72200
},
{
"epoch": 29.07,
"learning_rate": 0.0011130222696358512,
"loss": 1.3314,
"step": 72300
},
{
"epoch": 29.11,
"learning_rate": 0.001109533638111294,
"loss": 1.3332,
"step": 72400
},
{
"epoch": 29.15,
"learning_rate": 0.0011060461143437754,
"loss": 1.3335,
"step": 72500
},
{
"epoch": 29.19,
"learning_rate": 0.0011025597258368834,
"loss": 1.3329,
"step": 72600
},
{
"epoch": 29.23,
"learning_rate": 0.0010990745000852534,
"loss": 1.3321,
"step": 72700
},
{
"epoch": 29.28,
"learning_rate": 0.001095590464574351,
"loss": 1.3342,
"step": 72800
},
{
"epoch": 29.32,
"learning_rate": 0.001092107646780255,
"loss": 1.3315,
"step": 72900
},
{
"epoch": 29.36,
"learning_rate": 0.0010886260741694415,
"loss": 1.3332,
"step": 73000
},
{
"epoch": 29.4,
"learning_rate": 0.0010851457741985666,
"loss": 1.3323,
"step": 73100
},
{
"epoch": 29.44,
"learning_rate": 0.0010816667743142494,
"loss": 1.3331,
"step": 73200
},
{
"epoch": 29.48,
"learning_rate": 0.0010781891019528568,
"loss": 1.3314,
"step": 73300
},
{
"epoch": 29.52,
"learning_rate": 0.001074712784540286,
"loss": 1.3359,
"step": 73400
},
{
"epoch": 29.56,
"learning_rate": 0.0010712378494917494,
"loss": 1.3325,
"step": 73500
},
{
"epoch": 29.6,
"learning_rate": 0.0010677643242115566,
"loss": 1.3383,
"step": 73600
},
{
"epoch": 29.64,
"learning_rate": 0.0010642922360929003,
"loss": 1.3327,
"step": 73700
},
{
"epoch": 29.68,
"learning_rate": 0.001060821612517639,
"loss": 1.3328,
"step": 73800
},
{
"epoch": 29.72,
"learning_rate": 0.0010573524808560812,
"loss": 1.3312,
"step": 73900
},
{
"epoch": 29.76,
"learning_rate": 0.0010538848684667704,
"loss": 1.3339,
"step": 74000
},
{
"epoch": 29.8,
"learning_rate": 0.0010504188026962684,
"loss": 1.3313,
"step": 74100
},
{
"epoch": 29.84,
"learning_rate": 0.0010469543108789395,
"loss": 1.3321,
"step": 74200
},
{
"epoch": 29.88,
"learning_rate": 0.0010434914203367358,
"loss": 1.3363,
"step": 74300
},
{
"epoch": 29.92,
"learning_rate": 0.001040030158378981,
"loss": 1.3312,
"step": 74400
},
{
"epoch": 29.96,
"learning_rate": 0.0010365705523021557,
"loss": 1.3335,
"step": 74500
},
{
"epoch": 30.0,
"learning_rate": 0.0010331126293896813,
"loss": 1.3349,
"step": 74600
},
{
"epoch": 30.0,
"eval_accuracy": 0.4148599621052274,
"eval_loss": 1.2924647331237793,
"eval_runtime": 18.1541,
"eval_samples_per_second": 4382.932,
"eval_steps_per_second": 17.131,
"step": 74602
},
{
"epoch": 30.04,
"learning_rate": 0.001029656416911706,
"loss": 1.3219,
"step": 74700
},
{
"epoch": 30.08,
"learning_rate": 0.0010262019421248881,
"loss": 1.3239,
"step": 74800
},
{
"epoch": 30.12,
"learning_rate": 0.0010227492322721827,
"loss": 1.3254,
"step": 74900
},
{
"epoch": 30.16,
"learning_rate": 0.0010192983145826263,
"loss": 1.3246,
"step": 75000
},
{
"epoch": 30.2,
"learning_rate": 0.0010158492162711211,
"loss": 1.3219,
"step": 75100
},
{
"epoch": 30.24,
"learning_rate": 0.001012436427825709,
"loss": 1.3249,
"step": 75200
},
{
"epoch": 30.28,
"learning_rate": 0.0010089910309852388,
"loss": 1.3267,
"step": 75300
},
{
"epoch": 30.32,
"learning_rate": 0.0010055475348089424,
"loss": 1.3274,
"step": 75400
},
{
"epoch": 30.36,
"learning_rate": 0.001002105966453195,
"loss": 1.3264,
"step": 75500
},
{
"epoch": 30.4,
"learning_rate": 0.0009986663530591672,
"loss": 1.3272,
"step": 75600
},
{
"epoch": 30.44,
"learning_rate": 0.0009952287217526129,
"loss": 1.3283,
"step": 75700
},
{
"epoch": 30.48,
"learning_rate": 0.0009917930996436546,
"loss": 1.3314,
"step": 75800
},
{
"epoch": 30.52,
"learning_rate": 0.0009883595138265695,
"loss": 1.325,
"step": 75900
},
{
"epoch": 30.56,
"learning_rate": 0.0009849279913795768,
"loss": 1.3265,
"step": 76000
},
{
"epoch": 30.6,
"learning_rate": 0.0009814985593646216,
"loss": 1.3253,
"step": 76100
},
{
"epoch": 30.64,
"learning_rate": 0.000978071244827166,
"loss": 1.3291,
"step": 76200
},
{
"epoch": 30.68,
"learning_rate": 0.0009746460747959707,
"loss": 1.3277,
"step": 76300
},
{
"epoch": 30.72,
"learning_rate": 0.0009712230762828853,
"loss": 1.3246,
"step": 76400
},
{
"epoch": 30.76,
"learning_rate": 0.0009678022762826335,
"loss": 1.3267,
"step": 76500
},
{
"epoch": 30.8,
"learning_rate": 0.0009643837017726021,
"loss": 1.3257,
"step": 76600
},
{
"epoch": 30.84,
"learning_rate": 0.0009609673797126259,
"loss": 1.3273,
"step": 76700
},
{
"epoch": 30.88,
"learning_rate": 0.0009575533370447768,
"loss": 1.3245,
"step": 76800
},
{
"epoch": 30.92,
"learning_rate": 0.0009541416006931501,
"loss": 1.3252,
"step": 76900
},
{
"epoch": 30.96,
"learning_rate": 0.0009507321975636546,
"loss": 1.3282,
"step": 77000
},
{
"epoch": 31.0,
"eval_accuracy": 0.4151056550442814,
"eval_loss": 1.2873921394348145,
"eval_runtime": 17.9971,
"eval_samples_per_second": 4421.147,
"eval_steps_per_second": 17.281,
"step": 77089
},
{
"epoch": 31.0,
"learning_rate": 0.0009473251545437967,
"loss": 1.3231,
"step": 77100
},
{
"epoch": 31.04,
"learning_rate": 0.0009439204985024713,
"loss": 1.3123,
"step": 77200
},
{
"epoch": 31.08,
"learning_rate": 0.0009405182562897486,
"loss": 1.3151,
"step": 77300
},
{
"epoch": 31.12,
"learning_rate": 0.000937118454736663,
"loss": 1.3136,
"step": 77400
},
{
"epoch": 31.17,
"learning_rate": 0.0009337211206550006,
"loss": 1.3155,
"step": 77500
},
{
"epoch": 31.21,
"learning_rate": 0.0009303262808370891,
"loss": 1.3187,
"step": 77600
},
{
"epoch": 31.25,
"learning_rate": 0.0009269339620555848,
"loss": 1.3211,
"step": 77700
},
{
"epoch": 31.29,
"learning_rate": 0.0009235441910632633,
"loss": 1.323,
"step": 77800
},
{
"epoch": 31.33,
"learning_rate": 0.0009201569945928068,
"loss": 1.3187,
"step": 77900
},
{
"epoch": 31.37,
"learning_rate": 0.0009167723993565953,
"loss": 1.3142,
"step": 78000
},
{
"epoch": 31.41,
"learning_rate": 0.0009133904320464937,
"loss": 1.3196,
"step": 78100
},
{
"epoch": 31.45,
"learning_rate": 0.0009100111193336424,
"loss": 1.3236,
"step": 78200
},
{
"epoch": 31.49,
"learning_rate": 0.0009066344878682472,
"loss": 1.3203,
"step": 78300
},
{
"epoch": 31.53,
"learning_rate": 0.000903260564279369,
"loss": 1.3221,
"step": 78400
},
{
"epoch": 31.57,
"learning_rate": 0.0008998893751747131,
"loss": 1.3191,
"step": 78500
},
{
"epoch": 31.61,
"learning_rate": 0.0008965209471404204,
"loss": 1.3198,
"step": 78600
},
{
"epoch": 31.65,
"learning_rate": 0.0008931553067408565,
"loss": 1.3167,
"step": 78700
},
{
"epoch": 31.69,
"learning_rate": 0.000889792480518404,
"loss": 1.3185,
"step": 78800
},
{
"epoch": 31.73,
"learning_rate": 0.0008864324949932512,
"loss": 1.3217,
"step": 78900
},
{
"epoch": 31.77,
"learning_rate": 0.0008830753766631847,
"loss": 1.3212,
"step": 79000
},
{
"epoch": 31.81,
"learning_rate": 0.0008797211520033786,
"loss": 1.3209,
"step": 79100
},
{
"epoch": 31.85,
"learning_rate": 0.0008764033459701526,
"loss": 1.3207,
"step": 79200
},
{
"epoch": 31.89,
"learning_rate": 0.0008730549583886335,
"loss": 1.3185,
"step": 79300
},
{
"epoch": 31.93,
"learning_rate": 0.0008697095435011976,
"loss": 1.3145,
"step": 79400
},
{
"epoch": 31.97,
"learning_rate": 0.000866367127690723,
"loss": 1.3183,
"step": 79500
},
{
"epoch": 32.0,
"eval_accuracy": 0.4158149429454264,
"eval_loss": 1.2801185846328735,
"eval_runtime": 18.1222,
"eval_samples_per_second": 4390.647,
"eval_steps_per_second": 17.161,
"step": 79576
},
{
"epoch": 32.01,
"learning_rate": 0.0008630277373164358,
"loss": 1.3178,
"step": 79600
},
{
"epoch": 32.05,
"learning_rate": 0.0008596913987137034,
"loss": 1.3029,
"step": 79700
},
{
"epoch": 32.09,
"learning_rate": 0.0008563581381938254,
"loss": 1.3081,
"step": 79800
},
{
"epoch": 32.13,
"learning_rate": 0.000853161128356408,
"loss": 1.3116,
"step": 79900
},
{
"epoch": 32.17,
"learning_rate": 0.0008498339771096045,
"loss": 1.3085,
"step": 80000
},
{
"epoch": 32.21,
"learning_rate": 0.0008465099816840408,
"loss": 1.3123,
"step": 80100
},
{
"epoch": 32.25,
"learning_rate": 0.0008431891682936748,
"loss": 1.3101,
"step": 80200
},
{
"epoch": 32.29,
"learning_rate": 0.0008398715631273706,
"loss": 1.3127,
"step": 80300
},
{
"epoch": 32.33,
"learning_rate": 0.0008365571923486908,
"loss": 1.3101,
"step": 80400
},
{
"epoch": 32.37,
"learning_rate": 0.0008332460820956902,
"loss": 1.3141,
"step": 80500
},
{
"epoch": 32.41,
"learning_rate": 0.0008299382584807122,
"loss": 1.314,
"step": 80600
},
{
"epoch": 32.45,
"learning_rate": 0.0008266337475901788,
"loss": 1.3147,
"step": 80700
},
{
"epoch": 32.49,
"learning_rate": 0.0008233325754843877,
"loss": 1.3147,
"step": 80800
},
{
"epoch": 32.53,
"learning_rate": 0.0008200347681973055,
"loss": 1.3107,
"step": 80900
},
{
"epoch": 32.57,
"learning_rate": 0.0008167403517363639,
"loss": 1.3118,
"step": 81000
},
{
"epoch": 32.61,
"learning_rate": 0.0008134493520822528,
"loss": 1.3101,
"step": 81100
},
{
"epoch": 32.65,
"learning_rate": 0.0008101617951887165,
"loss": 1.3089,
"step": 81200
},
{
"epoch": 32.69,
"learning_rate": 0.0008068777069823485,
"loss": 1.3154,
"step": 81300
},
{
"epoch": 32.73,
"learning_rate": 0.0008035971133623876,
"loss": 1.3102,
"step": 81400
},
{
"epoch": 32.77,
"learning_rate": 0.0008003200402005126,
"loss": 1.3097,
"step": 81500
},
{
"epoch": 32.81,
"learning_rate": 0.0007970465133406403,
"loss": 1.3114,
"step": 81600
},
{
"epoch": 32.85,
"learning_rate": 0.0007937765585987194,
"loss": 1.3105,
"step": 81700
},
{
"epoch": 32.89,
"learning_rate": 0.0007905102017625279,
"loss": 1.3128,
"step": 81800
},
{
"epoch": 32.93,
"learning_rate": 0.0007872474685914699,
"loss": 1.3114,
"step": 81900
},
{
"epoch": 32.97,
"learning_rate": 0.0007839883848163723,
"loss": 1.3111,
"step": 82000
},
{
"epoch": 33.0,
"eval_accuracy": 0.4163647428508311,
"eval_loss": 1.2729204893112183,
"eval_runtime": 18.2365,
"eval_samples_per_second": 4363.111,
"eval_steps_per_second": 17.054,
"step": 82062
},
{
"epoch": 33.01,
"learning_rate": 0.0007807329761392816,
"loss": 1.3076,
"step": 82100
},
{
"epoch": 33.06,
"learning_rate": 0.0007774812682332619,
"loss": 1.2998,
"step": 82200
},
{
"epoch": 33.1,
"learning_rate": 0.0007742332867421911,
"loss": 1.3002,
"step": 82300
},
{
"epoch": 33.14,
"learning_rate": 0.000770989057280561,
"loss": 1.2987,
"step": 82400
},
{
"epoch": 33.18,
"learning_rate": 0.0007677486054332725,
"loss": 1.3055,
"step": 82500
},
{
"epoch": 33.22,
"learning_rate": 0.0007645119567554358,
"loss": 1.3044,
"step": 82600
},
{
"epoch": 33.26,
"learning_rate": 0.0007612791367721684,
"loss": 1.3029,
"step": 82700
},
{
"epoch": 33.3,
"learning_rate": 0.0007580501709783928,
"loss": 1.308,
"step": 82800
},
{
"epoch": 33.34,
"learning_rate": 0.0007548250848386371,
"loss": 1.3032,
"step": 82900
},
{
"epoch": 33.38,
"learning_rate": 0.0007516039037868335,
"loss": 1.3025,
"step": 83000
},
{
"epoch": 33.42,
"learning_rate": 0.0007483866532261166,
"loss": 1.3047,
"step": 83100
},
{
"epoch": 33.46,
"learning_rate": 0.0007451733585286252,
"loss": 1.3058,
"step": 83200
},
{
"epoch": 33.5,
"learning_rate": 0.0007419640450352999,
"loss": 1.3028,
"step": 83300
},
{
"epoch": 33.54,
"learning_rate": 0.0007387587380556856,
"loss": 1.3025,
"step": 83400
},
{
"epoch": 33.58,
"learning_rate": 0.0007355574628677295,
"loss": 1.3071,
"step": 83500
},
{
"epoch": 33.62,
"learning_rate": 0.0007323602447175839,
"loss": 1.3048,
"step": 83600
},
{
"epoch": 33.66,
"learning_rate": 0.0007291671088194063,
"loss": 1.3062,
"step": 83700
},
{
"epoch": 33.7,
"learning_rate": 0.0007259780803551594,
"loss": 1.3064,
"step": 83800
},
{
"epoch": 33.74,
"learning_rate": 0.0007227931844744142,
"loss": 1.301,
"step": 83900
},
{
"epoch": 33.78,
"learning_rate": 0.0007196124462941508,
"loss": 1.3034,
"step": 84000
},
{
"epoch": 33.82,
"learning_rate": 0.0007164358908985607,
"loss": 1.3039,
"step": 84100
},
{
"epoch": 33.86,
"learning_rate": 0.0007132635433388489,
"loss": 1.3041,
"step": 84200
},
{
"epoch": 33.9,
"learning_rate": 0.0007100954286330352,
"loss": 1.3004,
"step": 84300
},
{
"epoch": 33.94,
"learning_rate": 0.0007069315717657598,
"loss": 1.3056,
"step": 84400
},
{
"epoch": 33.98,
"learning_rate": 0.0007037719976880829,
"loss": 1.3033,
"step": 84500
},
{
"epoch": 34.0,
"eval_accuracy": 0.4169924281259647,
"eval_loss": 1.2650080919265747,
"eval_runtime": 18.2265,
"eval_samples_per_second": 4365.502,
"eval_steps_per_second": 17.063,
"step": 84549
},
{
"epoch": 34.02,
"learning_rate": 0.00070061673131729,
"loss": 1.2983,
"step": 84600
},
{
"epoch": 34.06,
"learning_rate": 0.0006974657975366943,
"loss": 1.2906,
"step": 84700
},
{
"epoch": 34.1,
"learning_rate": 0.0006943192211954416,
"loss": 1.2921,
"step": 84800
},
{
"epoch": 34.14,
"learning_rate": 0.0006911770271083132,
"loss": 1.2937,
"step": 84900
},
{
"epoch": 34.18,
"learning_rate": 0.0006880392400555313,
"loss": 1.2936,
"step": 85000
},
{
"epoch": 34.22,
"learning_rate": 0.0006849058847825628,
"loss": 1.2942,
"step": 85100
},
{
"epoch": 34.26,
"learning_rate": 0.0006817769859999242,
"loss": 1.293,
"step": 85200
},
{
"epoch": 34.3,
"learning_rate": 0.0006786525683829867,
"loss": 1.2976,
"step": 85300
},
{
"epoch": 34.34,
"learning_rate": 0.0006755326565717819,
"loss": 1.2933,
"step": 85400
},
{
"epoch": 34.38,
"learning_rate": 0.0006724172751708072,
"loss": 1.2956,
"step": 85500
},
{
"epoch": 34.42,
"learning_rate": 0.0006693064487488328,
"loss": 1.297,
"step": 85600
},
{
"epoch": 34.46,
"learning_rate": 0.0006662002018387062,
"loss": 1.2965,
"step": 85700
},
{
"epoch": 34.5,
"learning_rate": 0.0006630985589371597,
"loss": 1.296,
"step": 85800
},
{
"epoch": 34.54,
"learning_rate": 0.000660001544504617,
"loss": 1.2949,
"step": 85900
},
{
"epoch": 34.58,
"learning_rate": 0.0006569091829650009,
"loss": 1.2962,
"step": 86000
},
{
"epoch": 34.62,
"learning_rate": 0.0006538214987055396,
"loss": 1.2964,
"step": 86100
},
{
"epoch": 34.66,
"learning_rate": 0.0006507385160765756,
"loss": 1.2985,
"step": 86200
},
{
"epoch": 34.7,
"learning_rate": 0.0006476602593913723,
"loss": 1.2986,
"step": 86300
},
{
"epoch": 34.74,
"learning_rate": 0.0006445867529259235,
"loss": 1.295,
"step": 86400
},
{
"epoch": 34.78,
"learning_rate": 0.0006415180209187613,
"loss": 1.2955,
"step": 86500
},
{
"epoch": 34.82,
"learning_rate": 0.0006384540875707645,
"loss": 1.2943,
"step": 86600
},
{
"epoch": 34.86,
"learning_rate": 0.00063539497704497,
"loss": 1.2999,
"step": 86700
},
{
"epoch": 34.9,
"learning_rate": 0.0006323407134663786,
"loss": 1.2954,
"step": 86800
},
{
"epoch": 34.95,
"learning_rate": 0.0006292913209217682,
"loss": 1.2961,
"step": 86900
},
{
"epoch": 34.99,
"learning_rate": 0.0006262468234595013,
"loss": 1.2966,
"step": 87000
},
{
"epoch": 35.0,
"eval_accuracy": 0.4174700070521662,
"eval_loss": 1.2581887245178223,
"eval_runtime": 18.1816,
"eval_samples_per_second": 4376.288,
"eval_steps_per_second": 17.105,
"step": 87036
},
{
"epoch": 35.03,
"learning_rate": 0.0006232072450893371,
"loss": 1.2863,
"step": 87100
},
{
"epoch": 35.07,
"learning_rate": 0.0006201726097822411,
"loss": 1.2824,
"step": 87200
},
{
"epoch": 35.11,
"learning_rate": 0.0006171429414701966,
"loss": 1.2847,
"step": 87300
},
{
"epoch": 35.15,
"learning_rate": 0.0006141182640460158,
"loss": 1.2874,
"step": 87400
},
{
"epoch": 35.19,
"learning_rate": 0.0006110986013631511,
"loss": 1.2868,
"step": 87500
},
{
"epoch": 35.23,
"learning_rate": 0.0006080839772355072,
"loss": 1.288,
"step": 87600
},
{
"epoch": 35.27,
"learning_rate": 0.0006050744154372545,
"loss": 1.2858,
"step": 87700
},
{
"epoch": 35.31,
"learning_rate": 0.0006020699397026388,
"loss": 1.2898,
"step": 87800
},
{
"epoch": 35.35,
"learning_rate": 0.000599070573725797,
"loss": 1.2871,
"step": 87900
},
{
"epoch": 35.39,
"learning_rate": 0.0005960763411605681,
"loss": 1.2868,
"step": 88000
},
{
"epoch": 35.43,
"learning_rate": 0.0005930872656203081,
"loss": 1.2918,
"step": 88100
},
{
"epoch": 35.47,
"learning_rate": 0.0005901033706777035,
"loss": 1.2867,
"step": 88200
},
{
"epoch": 35.51,
"learning_rate": 0.0005871246798645846,
"loss": 1.288,
"step": 88300
},
{
"epoch": 35.55,
"learning_rate": 0.0005841512166717406,
"loss": 1.2896,
"step": 88400
},
{
"epoch": 35.59,
"learning_rate": 0.0005811830045487345,
"loss": 1.2879,
"step": 88500
},
{
"epoch": 35.63,
"learning_rate": 0.0005782200669037174,
"loss": 1.2887,
"step": 88600
},
{
"epoch": 35.67,
"learning_rate": 0.0005752624271032452,
"loss": 1.2855,
"step": 88700
},
{
"epoch": 35.71,
"learning_rate": 0.0005723101084720932,
"loss": 1.2858,
"step": 88800
},
{
"epoch": 35.75,
"learning_rate": 0.0005693631342930718,
"loss": 1.2873,
"step": 88900
},
{
"epoch": 35.79,
"learning_rate": 0.0005664215278068442,
"loss": 1.2889,
"step": 89000
},
{
"epoch": 35.83,
"learning_rate": 0.0005634853122117424,
"loss": 1.2868,
"step": 89100
},
{
"epoch": 35.87,
"learning_rate": 0.0005605545106635842,
"loss": 1.2876,
"step": 89200
},
{
"epoch": 35.91,
"learning_rate": 0.0005576291462754908,
"loss": 1.2869,
"step": 89300
},
{
"epoch": 35.95,
"learning_rate": 0.0005547092421177046,
"loss": 1.2883,
"step": 89400
},
{
"epoch": 35.99,
"learning_rate": 0.0005517948212174065,
"loss": 1.2858,
"step": 89500
},
{
"epoch": 36.0,
"eval_accuracy": 0.41807255659434184,
"eval_loss": 1.250754952430725,
"eval_runtime": 18.2293,
"eval_samples_per_second": 4364.838,
"eval_steps_per_second": 17.06,
"step": 89523
},
{
"epoch": 36.03,
"learning_rate": 0.0005488859065585351,
"loss": 1.275,
"step": 89600
},
{
"epoch": 36.07,
"learning_rate": 0.0005459825210816062,
"loss": 1.2742,
"step": 89700
},
{
"epoch": 36.11,
"learning_rate": 0.0005430846876835299,
"loss": 1.2769,
"step": 89800
},
{
"epoch": 36.15,
"learning_rate": 0.0005401924292174312,
"loss": 1.2783,
"step": 89900
},
{
"epoch": 36.19,
"learning_rate": 0.0005373346073161149,
"loss": 1.2779,
"step": 90000
},
{
"epoch": 36.23,
"learning_rate": 0.0005344535107797021,
"loss": 1.279,
"step": 90100
},
{
"epoch": 36.27,
"learning_rate": 0.0005315780572431434,
"loss": 1.2809,
"step": 90200
},
{
"epoch": 36.31,
"learning_rate": 0.0005287082693830698,
"loss": 1.2805,
"step": 90300
},
{
"epoch": 36.35,
"learning_rate": 0.0005258441698314319,
"loss": 1.28,
"step": 90400
},
{
"epoch": 36.39,
"learning_rate": 0.0005229857811753194,
"loss": 1.2804,
"step": 90500
},
{
"epoch": 36.43,
"learning_rate": 0.0005201331259567862,
"loss": 1.2817,
"step": 90600
},
{
"epoch": 36.47,
"learning_rate": 0.0005172862266726693,
"loss": 1.2787,
"step": 90700
},
{
"epoch": 36.51,
"learning_rate": 0.000514445105774413,
"loss": 1.2773,
"step": 90800
},
{
"epoch": 36.55,
"learning_rate": 0.0005116097856678919,
"loss": 1.2788,
"step": 90900
},
{
"epoch": 36.59,
"learning_rate": 0.0005087802887132336,
"loss": 1.2787,
"step": 91000
},
{
"epoch": 36.63,
"learning_rate": 0.000505956637224643,
"loss": 1.2777,
"step": 91100
},
{
"epoch": 36.67,
"learning_rate": 0.0005031388534702259,
"loss": 1.2796,
"step": 91200
},
{
"epoch": 36.71,
"learning_rate": 0.0005003269596718136,
"loss": 1.2787,
"step": 91300
},
{
"epoch": 36.75,
"learning_rate": 0.0004975209780047875,
"loss": 1.2829,
"step": 91400
},
{
"epoch": 36.8,
"learning_rate": 0.0004947209305979038,
"loss": 1.2794,
"step": 91500
},
{
"epoch": 36.84,
"learning_rate": 0.0004919268395331206,
"loss": 1.2799,
"step": 91600
},
{
"epoch": 36.88,
"learning_rate": 0.0004891387268454217,
"loss": 1.2785,
"step": 91700
},
{
"epoch": 36.92,
"learning_rate": 0.0004863566145226439,
"loss": 1.2797,
"step": 91800
},
{
"epoch": 36.96,
"learning_rate": 0.0004835805245053036,
"loss": 1.2783,
"step": 91900
},
{
"epoch": 37.0,
"learning_rate": 0.00048081047868642353,
"loss": 1.2794,
"step": 92000
},
{
"epoch": 37.0,
"eval_accuracy": 0.41857916552198776,
"eval_loss": 1.2428914308547974,
"eval_runtime": 19.1983,
"eval_samples_per_second": 4144.524,
"eval_steps_per_second": 16.199,
"step": 92009
},
{
"epoch": 37.04,
"learning_rate": 0.0004780741086105822,
"loss": 1.266,
"step": 92100
},
{
"epoch": 37.08,
"learning_rate": 0.0004753161556907,
"loss": 1.2675,
"step": 92200
},
{
"epoch": 37.12,
"learning_rate": 0.0004725643121444032,
"loss": 1.2649,
"step": 92300
},
{
"epoch": 37.16,
"learning_rate": 0.0004698185996734999,
"loss": 1.2662,
"step": 92400
},
{
"epoch": 37.2,
"learning_rate": 0.000467079039931447,
"loss": 1.2685,
"step": 92500
},
{
"epoch": 37.24,
"learning_rate": 0.0004643456545231797,
"loss": 1.2715,
"step": 92600
},
{
"epoch": 37.28,
"learning_rate": 0.00046161846500494027,
"loss": 1.2721,
"step": 92700
},
{
"epoch": 37.32,
"learning_rate": 0.00045889749288410803,
"loss": 1.2727,
"step": 92800
},
{
"epoch": 37.36,
"learning_rate": 0.0004561827596190307,
"loss": 1.2707,
"step": 92900
},
{
"epoch": 37.4,
"learning_rate": 0.0004534742866188546,
"loss": 1.2698,
"step": 93000
},
{
"epoch": 37.44,
"learning_rate": 0.00045079908599305484,
"loss": 1.2708,
"step": 93100
},
{
"epoch": 37.48,
"learning_rate": 0.00044810313441779866,
"loss": 1.275,
"step": 93200
},
{
"epoch": 37.52,
"learning_rate": 0.000445413506825629,
"loss": 1.2721,
"step": 93300
},
{
"epoch": 37.56,
"learning_rate": 0.00044273022442770137,
"loss": 1.2709,
"step": 93400
},
{
"epoch": 37.6,
"learning_rate": 0.00044005330838513225,
"loss": 1.2693,
"step": 93500
},
{
"epoch": 37.64,
"learning_rate": 0.0004373827798088306,
"loss": 1.2706,
"step": 93600
},
{
"epoch": 37.68,
"learning_rate": 0.0004347186597593318,
"loss": 1.27,
"step": 93700
},
{
"epoch": 37.72,
"learning_rate": 0.00043206096924663223,
"loss": 1.2732,
"step": 93800
},
{
"epoch": 37.76,
"learning_rate": 0.00042940972923002295,
"loss": 1.2717,
"step": 93900
},
{
"epoch": 37.8,
"learning_rate": 0.00042676496061792467,
"loss": 1.2724,
"step": 94000
},
{
"epoch": 37.84,
"learning_rate": 0.00042412668426772287,
"loss": 1.2709,
"step": 94100
},
{
"epoch": 37.88,
"learning_rate": 0.0004214949209856031,
"loss": 1.2743,
"step": 94200
},
{
"epoch": 37.92,
"learning_rate": 0.00041886969152638703,
"loss": 1.2685,
"step": 94300
},
{
"epoch": 37.96,
"learning_rate": 0.00041625101659336836,
"loss": 1.2687,
"step": 94400
},
{
"epoch": 38.0,
"eval_accuracy": 0.4191567563434093,
"eval_loss": 1.2355095148086548,
"eval_runtime": 18.0561,
"eval_samples_per_second": 4406.712,
"eval_steps_per_second": 17.224,
"step": 94496
},
{
"epoch": 38.0,
"learning_rate": 0.00041363891683815143,
"loss": 1.2674,
"step": 94500
},
{
"epoch": 38.04,
"learning_rate": 0.0004110334128604853,
"loss": 1.2529,
"step": 94600
},
{
"epoch": 38.08,
"learning_rate": 0.0004084345252081037,
"loss": 1.2597,
"step": 94700
},
{
"epoch": 38.12,
"learning_rate": 0.0004058422743765618,
"loss": 1.255,
"step": 94800
},
{
"epoch": 38.16,
"learning_rate": 0.00040325668080907486,
"loss": 1.2609,
"step": 94900
},
{
"epoch": 38.2,
"learning_rate": 0.0004006777648963576,
"loss": 1.2601,
"step": 95000
},
{
"epoch": 38.24,
"learning_rate": 0.0003981055469764626,
"loss": 1.2598,
"step": 95100
},
{
"epoch": 38.28,
"learning_rate": 0.00039554004733462005,
"loss": 1.2605,
"step": 95200
},
{
"epoch": 38.32,
"learning_rate": 0.0003929812862030781,
"loss": 1.2655,
"step": 95300
},
{
"epoch": 38.36,
"learning_rate": 0.0003904292837609427,
"loss": 1.2597,
"step": 95400
},
{
"epoch": 38.4,
"learning_rate": 0.00038788406013402,
"loss": 1.2629,
"step": 95500
},
{
"epoch": 38.44,
"learning_rate": 0.0003853456353946555,
"loss": 1.2638,
"step": 95600
},
{
"epoch": 38.48,
"learning_rate": 0.00038283931180072185,
"loss": 1.2636,
"step": 95700
},
{
"epoch": 38.52,
"learning_rate": 0.00038031447635151957,
"loss": 1.2616,
"step": 95800
},
{
"epoch": 38.56,
"learning_rate": 0.0003777964994857314,
"loss": 1.2616,
"step": 95900
},
{
"epoch": 38.6,
"learning_rate": 0.0003752854010608274,
"loss": 1.2649,
"step": 96000
},
{
"epoch": 38.64,
"learning_rate": 0.0003727812008800327,
"loss": 1.265,
"step": 96100
},
{
"epoch": 38.69,
"learning_rate": 0.0003702839186921707,
"loss": 1.261,
"step": 96200
},
{
"epoch": 38.73,
"learning_rate": 0.00036779357419150716,
"loss": 1.2632,
"step": 96300
},
{
"epoch": 38.77,
"learning_rate": 0.0003653101870175964,
"loss": 1.2623,
"step": 96400
},
{
"epoch": 38.81,
"learning_rate": 0.00036283377675512416,
"loss": 1.2614,
"step": 96500
},
{
"epoch": 38.85,
"learning_rate": 0.0003603643629337551,
"loss": 1.2626,
"step": 96600
},
{
"epoch": 38.89,
"learning_rate": 0.00035790196502797744,
"loss": 1.2615,
"step": 96700
},
{
"epoch": 38.93,
"learning_rate": 0.00035544660245695014,
"loss": 1.2628,
"step": 96800
},
{
"epoch": 38.97,
"learning_rate": 0.00035299829458434963,
"loss": 1.2632,
"step": 96900
},
{
"epoch": 39.0,
"eval_accuracy": 0.4198839224771513,
"eval_loss": 1.2276296615600586,
"eval_runtime": 17.963,
"eval_samples_per_second": 4429.538,
"eval_steps_per_second": 17.313,
"step": 96983
},
{
"epoch": 39.01,
"learning_rate": 0.00035055706071821695,
"loss": 1.2591,
"step": 97000
},
{
"epoch": 39.05,
"learning_rate": 0.00034812292011080534,
"loss": 1.2502,
"step": 97100
},
{
"epoch": 39.09,
"learning_rate": 0.0003456958919584291,
"loss": 1.249,
"step": 97200
},
{
"epoch": 39.13,
"learning_rate": 0.0003432759954013112,
"loss": 1.2494,
"step": 97300
},
{
"epoch": 39.17,
"learning_rate": 0.00034086324952343274,
"loss": 1.2543,
"step": 97400
},
{
"epoch": 39.21,
"learning_rate": 0.00033845767335238363,
"loss": 1.2507,
"step": 97500
},
{
"epoch": 39.25,
"learning_rate": 0.0003360592858592104,
"loss": 1.2529,
"step": 97600
},
{
"epoch": 39.29,
"learning_rate": 0.00033366810595826816,
"loss": 1.2519,
"step": 97700
},
{
"epoch": 39.33,
"learning_rate": 0.0003312841525070705,
"loss": 1.2511,
"step": 97800
},
{
"epoch": 39.37,
"learning_rate": 0.00032890744430614155,
"loss": 1.254,
"step": 97900
},
{
"epoch": 39.41,
"learning_rate": 0.0003265380000988674,
"loss": 1.2547,
"step": 98000
},
{
"epoch": 39.45,
"learning_rate": 0.00032417583857134795,
"loss": 1.253,
"step": 98100
},
{
"epoch": 39.49,
"learning_rate": 0.00032182097835225024,
"loss": 1.2491,
"step": 98200
},
{
"epoch": 39.53,
"learning_rate": 0.0003194734380126607,
"loss": 1.2557,
"step": 98300
},
{
"epoch": 39.57,
"learning_rate": 0.00031713323606593914,
"loss": 1.2547,
"step": 98400
},
{
"epoch": 39.61,
"learning_rate": 0.00031480039096757397,
"loss": 1.2565,
"step": 98500
},
{
"epoch": 39.65,
"learning_rate": 0.00031247492111503367,
"loss": 1.253,
"step": 98600
},
{
"epoch": 39.69,
"learning_rate": 0.00031015684484762455,
"loss": 1.2535,
"step": 98700
},
{
"epoch": 39.73,
"learning_rate": 0.00030784618044634463,
"loss": 1.253,
"step": 98800
},
{
"epoch": 39.77,
"learning_rate": 0.00030554294613374016,
"loss": 1.2542,
"step": 98900
},
{
"epoch": 39.81,
"learning_rate": 0.0003032471600737613,
"loss": 1.2526,
"step": 99000
},
{
"epoch": 39.85,
"learning_rate": 0.0003009588403716193,
"loss": 1.2552,
"step": 99100
},
{
"epoch": 39.89,
"learning_rate": 0.000298678005073644,
"loss": 1.2552,
"step": 99200
},
{
"epoch": 39.93,
"learning_rate": 0.00029640467216714053,
"loss": 1.2537,
"step": 99300
},
{
"epoch": 39.97,
"learning_rate": 0.0002941388595802483,
"loss": 1.2517,
"step": 99400
},
{
"epoch": 40.0,
"eval_accuracy": 0.42036451061081953,
"eval_loss": 1.2210617065429688,
"eval_runtime": 18.2161,
"eval_samples_per_second": 4367.993,
"eval_steps_per_second": 17.073,
"step": 99470
},
{
"epoch": 40.01,
"learning_rate": 0.0002918805851817999,
"loss": 1.2484,
"step": 99500
},
{
"epoch": 40.05,
"learning_rate": 0.0002896298667811789,
"loss": 1.2386,
"step": 99600
},
{
"epoch": 40.09,
"learning_rate": 0.00028738672212818065,
"loss": 1.2427,
"step": 99700
},
{
"epoch": 40.13,
"learning_rate": 0.0002851734868094691,
"loss": 1.2423,
"step": 99800
},
{
"epoch": 40.17,
"learning_rate": 0.00028294546648429563,
"loss": 1.2407,
"step": 99900
},
{
"epoch": 40.21,
"learning_rate": 0.00028072507262179606,
"loss": 1.2437,
"step": 100000
},
{
"epoch": 40.25,
"learning_rate": 0.00027851232273261794,
"loss": 1.2416,
"step": 100100
},
{
"epoch": 40.29,
"learning_rate": 0.00027630723426712566,
"loss": 1.2436,
"step": 100200
},
{
"epoch": 40.33,
"learning_rate": 0.000274109824615264,
"loss": 1.2457,
"step": 100300
},
{
"epoch": 40.37,
"learning_rate": 0.000271920111106421,
"loss": 1.2445,
"step": 100400
},
{
"epoch": 40.41,
"learning_rate": 0.0002697381110092896,
"loss": 1.2466,
"step": 100500
},
{
"epoch": 40.45,
"learning_rate": 0.0002675638415317336,
"loss": 1.2449,
"step": 100600
},
{
"epoch": 40.49,
"learning_rate": 0.0002653973198206501,
"loss": 1.2427,
"step": 100700
},
{
"epoch": 40.53,
"learning_rate": 0.00026323856296183584,
"loss": 1.2446,
"step": 100800
},
{
"epoch": 40.58,
"learning_rate": 0.0002610875879798516,
"loss": 1.2442,
"step": 100900
},
{
"epoch": 40.62,
"learning_rate": 0.0002589444118378878,
"loss": 1.2457,
"step": 101000
},
{
"epoch": 40.66,
"learning_rate": 0.0002568090514376313,
"loss": 1.2462,
"step": 101100
},
{
"epoch": 40.7,
"learning_rate": 0.00025468152361913203,
"loss": 1.245,
"step": 101200
},
{
"epoch": 40.74,
"learning_rate": 0.000252561845160669,
"loss": 1.2446,
"step": 101300
},
{
"epoch": 40.78,
"learning_rate": 0.0002504500327786208,
"loss": 1.244,
"step": 101400
},
{
"epoch": 40.82,
"learning_rate": 0.0002483461031273301,
"loss": 1.2472,
"step": 101500
},
{
"epoch": 40.86,
"learning_rate": 0.0002462500727989753,
"loss": 1.247,
"step": 101600
},
{
"epoch": 40.9,
"learning_rate": 0.00024416195832343765,
"loss": 1.2435,
"step": 101700
},
{
"epoch": 40.94,
"learning_rate": 0.00024208177616817276,
"loss": 1.2433,
"step": 101800
},
{
"epoch": 40.98,
"learning_rate": 0.00024003022567247874,
"loss": 1.244,
"step": 101900
},
{
"epoch": 41.0,
"eval_accuracy": 0.4209702463726659,
"eval_loss": 1.2142891883850098,
"eval_runtime": 18.2831,
"eval_samples_per_second": 4351.989,
"eval_steps_per_second": 17.01,
"step": 101956
},
{
"epoch": 41.02,
"learning_rate": 0.00023796587757840703,
"loss": 1.2387,
"step": 102000
},
{
"epoch": 41.06,
"learning_rate": 0.0002359095106686346,
"loss": 1.2334,
"step": 102100
},
{
"epoch": 41.1,
"learning_rate": 0.00023386114116024593,
"loss": 1.233,
"step": 102200
},
{
"epoch": 41.14,
"learning_rate": 0.00023182078520725695,
"loss": 1.2353,
"step": 102300
},
{
"epoch": 41.18,
"learning_rate": 0.0002297884589004852,
"loss": 1.2358,
"step": 102400
},
{
"epoch": 41.22,
"learning_rate": 0.0002277641782674246,
"loss": 1.2354,
"step": 102500
},
{
"epoch": 41.26,
"learning_rate": 0.000225747959272119,
"loss": 1.2359,
"step": 102600
},
{
"epoch": 41.3,
"learning_rate": 0.00022373981781503545,
"loss": 1.2341,
"step": 102700
},
{
"epoch": 41.34,
"learning_rate": 0.00022173976973293943,
"loss": 1.2361,
"step": 102800
},
{
"epoch": 41.38,
"learning_rate": 0.0002197478307987698,
"loss": 1.2394,
"step": 102900
},
{
"epoch": 41.42,
"learning_rate": 0.00021776401672151432,
"loss": 1.2384,
"step": 103000
},
{
"epoch": 41.46,
"learning_rate": 0.00021578834314608638,
"loss": 1.2345,
"step": 103100
},
{
"epoch": 41.5,
"learning_rate": 0.0002138208256532001,
"loss": 1.2345,
"step": 103200
},
{
"epoch": 41.54,
"learning_rate": 0.00021186147975924913,
"loss": 1.2368,
"step": 103300
},
{
"epoch": 41.58,
"learning_rate": 0.000209910320916184,
"loss": 1.237,
"step": 103400
},
{
"epoch": 41.62,
"learning_rate": 0.00020796736451138902,
"loss": 1.2365,
"step": 103500
},
{
"epoch": 41.66,
"learning_rate": 0.0002060326258675621,
"loss": 1.237,
"step": 103600
},
{
"epoch": 41.7,
"learning_rate": 0.00020410612024259367,
"loss": 1.2337,
"step": 103700
},
{
"epoch": 41.74,
"learning_rate": 0.00020218786282944629,
"loss": 1.2346,
"step": 103800
},
{
"epoch": 41.78,
"learning_rate": 0.00020029692774372416,
"loss": 1.2401,
"step": 103900
},
{
"epoch": 41.82,
"learning_rate": 0.00019839512921442467,
"loss": 1.2351,
"step": 104000
},
{
"epoch": 41.86,
"learning_rate": 0.0001965016239354195,
"loss": 1.2362,
"step": 104100
},
{
"epoch": 41.9,
"learning_rate": 0.00019461642683942098,
"loss": 1.2342,
"step": 104200
},
{
"epoch": 41.94,
"learning_rate": 0.00019273955279362155,
"loss": 1.2378,
"step": 104300
},
{
"epoch": 41.98,
"learning_rate": 0.00019087101659957472,
"loss": 1.2342,
"step": 104400
},
{
"epoch": 42.0,
"eval_accuracy": 0.4214678276779114,
"eval_loss": 1.206972599029541,
"eval_runtime": 23.1456,
"eval_samples_per_second": 3437.717,
"eval_steps_per_second": 13.437,
"step": 104443
},
{
"epoch": 42.02,
"learning_rate": 0.0001890108329930798,
"loss": 1.2246,
"step": 104500
},
{
"epoch": 42.06,
"learning_rate": 0.00018715901664406552,
"loss": 1.2236,
"step": 104600
},
{
"epoch": 42.1,
"learning_rate": 0.00018533397496334577,
"loss": 1.2253,
"step": 104700
},
{
"epoch": 42.14,
"learning_rate": 0.00018349885283927892,
"loss": 1.2263,
"step": 104800
},
{
"epoch": 42.18,
"learning_rate": 0.00018167214144171039,
"loss": 1.226,
"step": 104900
},
{
"epoch": 42.22,
"learning_rate": 0.0001798538551765977,
"loss": 1.2303,
"step": 105000
},
{
"epoch": 42.26,
"learning_rate": 0.00017804400838345501,
"loss": 1.2247,
"step": 105100
},
{
"epoch": 42.3,
"learning_rate": 0.0001762426153352409,
"loss": 1.225,
"step": 105200
},
{
"epoch": 42.34,
"learning_rate": 0.00017444969023824462,
"loss": 1.2298,
"step": 105300
},
{
"epoch": 42.38,
"learning_rate": 0.0001726652472319755,
"loss": 1.2271,
"step": 105400
},
{
"epoch": 42.42,
"learning_rate": 0.00017088930038905006,
"loss": 1.2284,
"step": 105500
},
{
"epoch": 42.47,
"learning_rate": 0.00016912186371508266,
"loss": 1.2296,
"step": 105600
},
{
"epoch": 42.51,
"learning_rate": 0.00016736295114857318,
"loss": 1.2285,
"step": 105700
},
{
"epoch": 42.55,
"learning_rate": 0.00016561257656079852,
"loss": 1.2285,
"step": 105800
},
{
"epoch": 42.59,
"learning_rate": 0.0001638707537557023,
"loss": 1.2271,
"step": 105900
},
{
"epoch": 42.63,
"learning_rate": 0.00016213749646978664,
"loss": 1.2288,
"step": 106000
},
{
"epoch": 42.67,
"learning_rate": 0.0001604128183720037,
"loss": 1.229,
"step": 106100
},
{
"epoch": 42.71,
"learning_rate": 0.00015869673306364714,
"loss": 1.2282,
"step": 106200
},
{
"epoch": 42.75,
"learning_rate": 0.00015698925407824665,
"loss": 1.2256,
"step": 106300
},
{
"epoch": 42.79,
"learning_rate": 0.0001552903948814588,
"loss": 1.2279,
"step": 106400
},
{
"epoch": 42.83,
"learning_rate": 0.0001536001688709629,
"loss": 1.2291,
"step": 106500
},
{
"epoch": 42.87,
"learning_rate": 0.00015191858937635403,
"loss": 1.2299,
"step": 106600
},
{
"epoch": 42.91,
"learning_rate": 0.00015024566965903888,
"loss": 1.229,
"step": 106700
},
{
"epoch": 42.95,
"learning_rate": 0.00014858142291213032,
"loss": 1.2265,
"step": 106800
},
{
"epoch": 42.99,
"learning_rate": 0.00014692586226034408,
"loss": 1.2274,
"step": 106900
},
{
"epoch": 43.0,
"eval_accuracy": 0.42207267837873813,
"eval_loss": 1.200390100479126,
"eval_runtime": 18.2476,
"eval_samples_per_second": 4360.456,
"eval_steps_per_second": 17.043,
"step": 106930
},
{
"epoch": 43.03,
"learning_rate": 0.00014527900075989462,
"loss": 1.2189,
"step": 107000
},
{
"epoch": 43.07,
"learning_rate": 0.00014364085139839235,
"loss": 1.215,
"step": 107100
},
{
"epoch": 43.11,
"learning_rate": 0.0001420114270947423,
"loss": 1.2176,
"step": 107200
},
{
"epoch": 43.15,
"learning_rate": 0.00014039074069903984,
"loss": 1.2178,
"step": 107300
},
{
"epoch": 43.19,
"learning_rate": 0.00013877880499247182,
"loss": 1.2195,
"step": 107400
},
{
"epoch": 43.23,
"learning_rate": 0.00013717563268721394,
"loss": 1.2172,
"step": 107500
},
{
"epoch": 43.27,
"learning_rate": 0.00013558123642633203,
"loss": 1.2201,
"step": 107600
},
{
"epoch": 43.31,
"learning_rate": 0.00013399562878368092,
"loss": 1.2208,
"step": 107700
},
{
"epoch": 43.35,
"learning_rate": 0.0001324188222638062,
"loss": 1.2206,
"step": 107800
},
{
"epoch": 43.39,
"learning_rate": 0.00013085082930184538,
"loss": 1.2216,
"step": 107900
},
{
"epoch": 43.43,
"learning_rate": 0.00012929166226342993,
"loss": 1.2193,
"step": 108000
},
{
"epoch": 43.47,
"learning_rate": 0.00012774133344458705,
"loss": 1.2204,
"step": 108100
},
{
"epoch": 43.51,
"learning_rate": 0.0001261998550716438,
"loss": 1.2178,
"step": 108200
},
{
"epoch": 43.55,
"learning_rate": 0.00012466723930113028,
"loss": 1.22,
"step": 108300
},
{
"epoch": 43.59,
"learning_rate": 0.0001231434982196833,
"loss": 1.2202,
"step": 108400
},
{
"epoch": 43.63,
"learning_rate": 0.00012162864384395109,
"loss": 1.2178,
"step": 108500
},
{
"epoch": 43.67,
"learning_rate": 0.00012012268812049948,
"loss": 1.2205,
"step": 108600
},
{
"epoch": 43.71,
"learning_rate": 0.00011862564292571654,
"loss": 1.2218,
"step": 108700
},
{
"epoch": 43.75,
"learning_rate": 0.00011715235709016989,
"loss": 1.2213,
"step": 108800
},
{
"epoch": 43.79,
"learning_rate": 0.00011567307890214637,
"loss": 1.2187,
"step": 108900
},
{
"epoch": 43.83,
"learning_rate": 0.00011420274633365508,
"loss": 1.2207,
"step": 109000
},
{
"epoch": 43.87,
"learning_rate": 0.00011274137098015003,
"loss": 1.2219,
"step": 109100
},
{
"epoch": 43.91,
"learning_rate": 0.00011128896436644637,
"loss": 1.2187,
"step": 109200
},
{
"epoch": 43.95,
"learning_rate": 0.00010984553794662913,
"loss": 1.2246,
"step": 109300
},
{
"epoch": 43.99,
"learning_rate": 0.00010841110310396294,
"loss": 1.2195,
"step": 109400
},
{
"epoch": 44.0,
"eval_accuracy": 0.4225120226688909,
"eval_loss": 1.1945446729660034,
"eval_runtime": 18.2468,
"eval_samples_per_second": 4360.644,
"eval_steps_per_second": 17.044,
"step": 109417
},
{
"epoch": 44.03,
"learning_rate": 0.00010698567115080215,
"loss": 1.2138,
"step": 109500
},
{
"epoch": 44.07,
"learning_rate": 0.00010556925332850243,
"loss": 1.2119,
"step": 109600
},
{
"epoch": 44.11,
"learning_rate": 0.00010416186080733053,
"loss": 1.211,
"step": 109700
},
{
"epoch": 44.15,
"learning_rate": 0.0001027635046863784,
"loss": 1.2115,
"step": 109800
},
{
"epoch": 44.19,
"learning_rate": 0.00010137419599347344,
"loss": 1.2124,
"step": 109900
},
{
"epoch": 44.23,
"learning_rate": 9.999394568509276e-05,
"loss": 1.2094,
"step": 110000
},
{
"epoch": 44.27,
"learning_rate": 9.862276464627659e-05,
"loss": 1.2145,
"step": 110100
},
{
"epoch": 44.31,
"learning_rate": 9.72606636905421e-05,
"loss": 1.2153,
"step": 110200
},
{
"epoch": 44.36,
"learning_rate": 9.590765355979872e-05,
"loss": 1.2129,
"step": 110300
},
{
"epoch": 44.4,
"learning_rate": 9.456374492426326e-05,
"loss": 1.2112,
"step": 110400
},
{
"epoch": 44.44,
"learning_rate": 9.322894838237492e-05,
"loss": 1.2115,
"step": 110500
},
{
"epoch": 44.48,
"learning_rate": 9.190327446071303e-05,
"loss": 1.2111,
"step": 110600
},
{
"epoch": 44.52,
"learning_rate": 9.059985377950942e-05,
"loss": 1.2122,
"step": 110700
},
{
"epoch": 44.56,
"learning_rate": 8.929236490444886e-05,
"loss": 1.2102,
"step": 110800
},
{
"epoch": 44.6,
"learning_rate": 8.799402969461356e-05,
"loss": 1.2157,
"step": 110900
},
{
"epoch": 44.64,
"learning_rate": 8.670485838903813e-05,
"loss": 1.2117,
"step": 111000
},
{
"epoch": 44.68,
"learning_rate": 8.542486115448847e-05,
"loss": 1.2159,
"step": 111100
},
{
"epoch": 44.72,
"learning_rate": 8.415404808538113e-05,
"loss": 1.2135,
"step": 111200
},
{
"epoch": 44.76,
"learning_rate": 8.289242920370337e-05,
"loss": 1.2141,
"step": 111300
},
{
"epoch": 44.8,
"learning_rate": 8.164001445893466e-05,
"loss": 1.2145,
"step": 111400
},
{
"epoch": 44.84,
"learning_rate": 8.039681372796842e-05,
"loss": 1.2121,
"step": 111500
},
{
"epoch": 44.88,
"learning_rate": 7.916283681503323e-05,
"loss": 1.2131,
"step": 111600
},
{
"epoch": 44.92,
"learning_rate": 7.793809345161661e-05,
"loss": 1.2091,
"step": 111700
},
{
"epoch": 44.96,
"learning_rate": 7.67225932963872e-05,
"loss": 1.2121,
"step": 111800
},
{
"epoch": 45.0,
"learning_rate": 7.551634593511963e-05,
"loss": 1.2127,
"step": 111900
},
{
"epoch": 45.0,
"eval_accuracy": 0.4228867575046094,
"eval_loss": 1.1900668144226074,
"eval_runtime": 18.1455,
"eval_samples_per_second": 4385.002,
"eval_steps_per_second": 17.139,
"step": 111903
},
{
"epoch": 45.04,
"learning_rate": 7.43193608806178e-05,
"loss": 1.2036,
"step": 112000
},
{
"epoch": 45.08,
"learning_rate": 7.313164757264124e-05,
"loss": 1.2056,
"step": 112100
},
{
"epoch": 45.12,
"learning_rate": 7.19532153778292e-05,
"loss": 1.2025,
"step": 112200
},
{
"epoch": 45.16,
"learning_rate": 7.078407358962785e-05,
"loss": 1.2057,
"step": 112300
},
{
"epoch": 45.2,
"learning_rate": 6.962423142821653e-05,
"loss": 1.2062,
"step": 112400
},
{
"epoch": 45.24,
"learning_rate": 6.847369804043491e-05,
"loss": 1.2055,
"step": 112500
},
{
"epoch": 45.28,
"learning_rate": 6.733248249971153e-05,
"loss": 1.203,
"step": 112600
},
{
"epoch": 45.32,
"learning_rate": 6.620059380599119e-05,
"loss": 1.2062,
"step": 112700
},
{
"epoch": 45.36,
"learning_rate": 6.507804088566522e-05,
"loss": 1.2078,
"step": 112800
},
{
"epoch": 45.4,
"learning_rate": 6.396483259149974e-05,
"loss": 1.2036,
"step": 112900
},
{
"epoch": 45.44,
"learning_rate": 6.286097770256697e-05,
"loss": 1.2054,
"step": 113000
},
{
"epoch": 45.48,
"learning_rate": 6.176648492417572e-05,
"loss": 1.2043,
"step": 113100
},
{
"epoch": 45.52,
"learning_rate": 6.0681362887802236e-05,
"loss": 1.2082,
"step": 113200
},
{
"epoch": 45.56,
"learning_rate": 5.9605620151022416e-05,
"loss": 1.2087,
"step": 113300
},
{
"epoch": 45.6,
"learning_rate": 5.853926519744451e-05,
"loss": 1.2083,
"step": 113400
},
{
"epoch": 45.64,
"learning_rate": 5.748230643664226e-05,
"loss": 1.2055,
"step": 113500
},
{
"epoch": 45.68,
"learning_rate": 5.6434752204088115e-05,
"loss": 1.2066,
"step": 113600
},
{
"epoch": 45.72,
"learning_rate": 5.539661076108787e-05,
"loss": 1.2076,
"step": 113700
},
{
"epoch": 45.76,
"learning_rate": 5.436789029471562e-05,
"loss": 1.2076,
"step": 113800
},
{
"epoch": 45.8,
"learning_rate": 5.3348598917748456e-05,
"loss": 1.2115,
"step": 113900
},
{
"epoch": 45.84,
"learning_rate": 5.2348796471101136e-05,
"loss": 1.2063,
"step": 114000
},
{
"epoch": 45.88,
"learning_rate": 5.134829282367678e-05,
"loss": 1.2078,
"step": 114100
},
{
"epoch": 45.92,
"learning_rate": 5.035724207904865e-05,
"loss": 1.2081,
"step": 114200
},
{
"epoch": 45.96,
"learning_rate": 4.937565205291983e-05,
"loss": 1.2077,
"step": 114300
},
{
"epoch": 46.0,
"eval_accuracy": 0.42319670587368974,
"eval_loss": 1.1863625049591064,
"eval_runtime": 18.1292,
"eval_samples_per_second": 4388.94,
"eval_steps_per_second": 17.155,
"step": 114390
},
{
"epoch": 46.0,
"learning_rate": 4.840353048638432e-05,
"loss": 1.2057,
"step": 114400
},
{
"epoch": 46.04,
"learning_rate": 4.7440885045864775e-05,
"loss": 1.1987,
"step": 114500
},
{
"epoch": 46.08,
"learning_rate": 4.6487723323052344e-05,
"loss": 1.2002,
"step": 114600
},
{
"epoch": 46.12,
"learning_rate": 4.554405283484733e-05,
"loss": 1.1993,
"step": 114700
},
{
"epoch": 46.16,
"learning_rate": 4.4609881023299216e-05,
"loss": 1.1977,
"step": 114800
},
{
"epoch": 46.2,
"learning_rate": 4.3685215255548086e-05,
"loss": 1.2031,
"step": 114900
},
{
"epoch": 46.25,
"learning_rate": 4.277006282376719e-05,
"loss": 1.1998,
"step": 115000
},
{
"epoch": 46.29,
"learning_rate": 4.186443094510467e-05,
"loss": 1.2023,
"step": 115100
},
{
"epoch": 46.33,
"learning_rate": 4.096832676162704e-05,
"loss": 1.2014,
"step": 115200
},
{
"epoch": 46.37,
"learning_rate": 4.008175734026259e-05,
"loss": 1.2005,
"step": 115300
},
{
"epoch": 46.41,
"learning_rate": 3.920472967274588e-05,
"loss": 1.2036,
"step": 115400
},
{
"epoch": 46.45,
"learning_rate": 3.833725067556235e-05,
"loss": 1.2017,
"step": 115500
},
{
"epoch": 46.49,
"learning_rate": 3.747932718989433e-05,
"loss": 1.1997,
"step": 115600
},
{
"epoch": 46.53,
"learning_rate": 3.663096598156654e-05,
"loss": 1.2002,
"step": 115700
},
{
"epoch": 46.57,
"learning_rate": 3.579217374099275e-05,
"loss": 1.2012,
"step": 115800
},
{
"epoch": 46.61,
"learning_rate": 3.496295708312336e-05,
"loss": 1.2016,
"step": 115900
},
{
"epoch": 46.65,
"learning_rate": 3.4143322547392775e-05,
"loss": 1.2027,
"step": 116000
},
{
"epoch": 46.69,
"learning_rate": 3.3333276597668774e-05,
"loss": 1.1987,
"step": 116100
},
{
"epoch": 46.73,
"learning_rate": 3.253282562220003e-05,
"loss": 1.2016,
"step": 116200
},
{
"epoch": 46.77,
"learning_rate": 3.1741975933567006e-05,
"loss": 1.2037,
"step": 116300
},
{
"epoch": 46.81,
"learning_rate": 3.0960733768631707e-05,
"loss": 1.2031,
"step": 116400
},
{
"epoch": 46.85,
"learning_rate": 3.0189105288488535e-05,
"loss": 1.199,
"step": 116500
},
{
"epoch": 46.89,
"learning_rate": 2.942709657841547e-05,
"loss": 1.2021,
"step": 116600
},
{
"epoch": 46.93,
"learning_rate": 2.8674713647826452e-05,
"loss": 1.2008,
"step": 116700
},
{
"epoch": 46.97,
"learning_rate": 2.7931962430223916e-05,
"loss": 1.2008,
"step": 116800
},
{
"epoch": 47.0,
"eval_accuracy": 0.4234523114961638,
"eval_loss": 1.1835007667541504,
"eval_runtime": 18.0486,
"eval_samples_per_second": 4408.549,
"eval_steps_per_second": 17.231,
"step": 116877
},
{
"epoch": 47.01,
"learning_rate": 2.7206132194601552e-05,
"loss": 1.2004,
"step": 116900
},
{
"epoch": 47.05,
"learning_rate": 2.6482565437709605e-05,
"loss": 1.1965,
"step": 117000
},
{
"epoch": 47.09,
"learning_rate": 2.5768647681698314e-05,
"loss": 1.1938,
"step": 117100
},
{
"epoch": 47.13,
"learning_rate": 2.5064384556722914e-05,
"loss": 1.1968,
"step": 117200
},
{
"epoch": 47.17,
"learning_rate": 2.4369781616799536e-05,
"loss": 1.1974,
"step": 117300
},
{
"epoch": 47.21,
"learning_rate": 2.368484433976123e-05,
"loss": 1.1944,
"step": 117400
},
{
"epoch": 47.25,
"learning_rate": 2.3009578127215076e-05,
"loss": 1.1977,
"step": 117500
},
{
"epoch": 47.29,
"learning_rate": 2.2343988304499436e-05,
"loss": 1.1989,
"step": 117600
},
{
"epoch": 47.33,
"learning_rate": 2.168808012064205e-05,
"loss": 1.1966,
"step": 117700
},
{
"epoch": 47.37,
"learning_rate": 2.1041858748318126e-05,
"loss": 1.1972,
"step": 117800
},
{
"epoch": 47.41,
"learning_rate": 2.0405329283810226e-05,
"loss": 1.1981,
"step": 117900
},
{
"epoch": 47.45,
"learning_rate": 1.977849674696816e-05,
"loss": 1.1981,
"step": 118000
},
{
"epoch": 47.49,
"learning_rate": 1.9161366081168612e-05,
"loss": 1.1979,
"step": 118100
},
{
"epoch": 47.53,
"learning_rate": 1.8553942153276548e-05,
"loss": 1.1966,
"step": 118200
},
{
"epoch": 47.57,
"learning_rate": 1.7956229753607333e-05,
"loss": 1.2016,
"step": 118300
},
{
"epoch": 47.61,
"learning_rate": 1.736823359588788e-05,
"loss": 1.1971,
"step": 118400
},
{
"epoch": 47.65,
"learning_rate": 1.678995831722055e-05,
"loss": 1.1989,
"step": 118500
},
{
"epoch": 47.69,
"learning_rate": 1.6221408478046123e-05,
"loss": 1.1986,
"step": 118600
},
{
"epoch": 47.73,
"learning_rate": 1.5662588562107698e-05,
"loss": 1.1963,
"step": 118700
},
{
"epoch": 47.77,
"learning_rate": 1.511350297641545e-05,
"loss": 1.1946,
"step": 118800
},
{
"epoch": 47.81,
"learning_rate": 1.4574156051211929e-05,
"loss": 1.1993,
"step": 118900
},
{
"epoch": 47.85,
"learning_rate": 1.4044552039938069e-05,
"loss": 1.1962,
"step": 119000
},
{
"epoch": 47.89,
"learning_rate": 1.3524695119199182e-05,
"loss": 1.1942,
"step": 119100
},
{
"epoch": 47.93,
"learning_rate": 1.3014589388732484e-05,
"loss": 1.1956,
"step": 119200
},
{
"epoch": 47.97,
"learning_rate": 1.2514238871374623e-05,
"loss": 1.1957,
"step": 119300
},
{
"epoch": 48.0,
"eval_accuracy": 0.42356701540431,
"eval_loss": 1.1818615198135376,
"eval_runtime": 18.2384,
"eval_samples_per_second": 4362.669,
"eval_steps_per_second": 17.052,
"step": 119364
},
{
"epoch": 48.01,
"learning_rate": 1.2023647513029479e-05,
"loss": 1.1975,
"step": 119400
},
{
"epoch": 48.05,
"learning_rate": 1.1542819182638053e-05,
"loss": 1.1943,
"step": 119500
},
{
"epoch": 48.09,
"learning_rate": 1.1071757672147104e-05,
"loss": 1.1947,
"step": 119600
},
{
"epoch": 48.14,
"learning_rate": 1.0610466696479442e-05,
"loss": 1.1944,
"step": 119700
},
{
"epoch": 48.18,
"learning_rate": 1.01589498935048e-05,
"loss": 1.1939,
"step": 119800
},
{
"epoch": 48.22,
"learning_rate": 9.717210824011086e-06,
"loss": 1.1924,
"step": 119900
},
{
"epoch": 48.26,
"learning_rate": 9.285252971676233e-06,
"loss": 1.1939,
"step": 120000
},
{
"epoch": 48.3,
"learning_rate": 8.86307974304057e-06,
"loss": 1.1958,
"step": 120100
},
{
"epoch": 48.34,
"learning_rate": 8.450694467480591e-06,
"loss": 1.1931,
"step": 120200
},
{
"epoch": 48.38,
"learning_rate": 8.048100397181762e-06,
"loss": 1.1943,
"step": 120300
},
{
"epoch": 48.42,
"learning_rate": 7.655300707113676e-06,
"loss": 1.1952,
"step": 120400
},
{
"epoch": 48.46,
"learning_rate": 7.272298495004659e-06,
"loss": 1.1925,
"step": 120500
},
{
"epoch": 48.5,
"learning_rate": 6.899096781317199e-06,
"loss": 1.1928,
"step": 120600
},
{
"epoch": 48.54,
"learning_rate": 6.535698509224641e-06,
"loss": 1.1952,
"step": 120700
},
{
"epoch": 48.58,
"learning_rate": 6.182106544587313e-06,
"loss": 1.1951,
"step": 120800
},
{
"epoch": 48.62,
"learning_rate": 5.838323675930318e-06,
"loss": 1.1945,
"step": 120900
},
{
"epoch": 48.66,
"learning_rate": 5.507643747879182e-06,
"loss": 1.194,
"step": 121000
},
{
"epoch": 48.7,
"learning_rate": 5.1833889701156935e-06,
"loss": 1.1933,
"step": 121100
},
{
"epoch": 48.74,
"learning_rate": 4.868951164498925e-06,
"loss": 1.1949,
"step": 121200
},
{
"epoch": 48.78,
"learning_rate": 4.564332810773342e-06,
"loss": 1.1965,
"step": 121300
},
{
"epoch": 48.82,
"learning_rate": 4.269536311244659e-06,
"loss": 1.1954,
"step": 121400
},
{
"epoch": 48.86,
"learning_rate": 3.9845639907604145e-06,
"loss": 1.1923,
"step": 121500
},
{
"epoch": 48.9,
"learning_rate": 3.709418096692202e-06,
"loss": 1.1934,
"step": 121600
},
{
"epoch": 48.94,
"learning_rate": 3.4441007989174933e-06,
"loss": 1.1964,
"step": 121700
},
{
"epoch": 48.98,
"learning_rate": 3.1886141898027077e-06,
"loss": 1.1931,
"step": 121800
},
{
"epoch": 49.0,
"eval_accuracy": 0.4236181719312456,
"eval_loss": 1.1811745166778564,
"eval_runtime": 18.2028,
"eval_samples_per_second": 4371.185,
"eval_steps_per_second": 17.085,
"step": 121850
},
{
"epoch": 49.02,
"learning_rate": 2.942960284186835e-06,
"loss": 1.1948,
"step": 121900
},
{
"epoch": 49.06,
"learning_rate": 2.7071410193652e-06,
"loss": 1.1911,
"step": 122000
},
{
"epoch": 49.1,
"learning_rate": 2.4811582550746113e-06,
"loss": 1.1927,
"step": 122100
},
{
"epoch": 49.14,
"learning_rate": 2.2671265131336828e-06,
"loss": 1.193,
"step": 122200
},
{
"epoch": 49.18,
"learning_rate": 2.06072361075213e-06,
"loss": 1.1949,
"step": 122300
},
{
"epoch": 49.22,
"learning_rate": 1.8641623067300329e-06,
"loss": 1.1902,
"step": 122400
},
{
"epoch": 49.26,
"learning_rate": 1.6774441512050487e-06,
"loss": 1.1927,
"step": 122500
},
{
"epoch": 49.3,
"learning_rate": 1.5005706166887346e-06,
"loss": 1.1923,
"step": 122600
},
{
"epoch": 49.34,
"learning_rate": 1.3335430980553065e-06,
"loss": 1.1914,
"step": 122700
},
{
"epoch": 49.38,
"learning_rate": 1.1763629125303987e-06,
"loss": 1.1914,
"step": 122800
},
{
"epoch": 49.42,
"learning_rate": 1.0290312996810714e-06,
"loss": 1.1934,
"step": 122900
},
{
"epoch": 49.46,
"learning_rate": 8.915494214056796e-07,
"loss": 1.192,
"step": 123000
},
{
"epoch": 49.5,
"learning_rate": 7.639183619247148e-07,
"loss": 1.1915,
"step": 123100
},
{
"epoch": 49.54,
"learning_rate": 6.461391277723383e-07,
"loss": 1.1949,
"step": 123200
},
{
"epoch": 49.58,
"learning_rate": 5.382126477887495e-07,
"loss": 1.1945,
"step": 123300
},
{
"epoch": 49.62,
"learning_rate": 4.4013977311213593e-07,
"loss": 1.1929,
"step": 123400
},
{
"epoch": 49.66,
"learning_rate": 3.519212771728453e-07,
"loss": 1.1936,
"step": 123500
},
{
"epoch": 49.7,
"learning_rate": 2.7355785568658476e-07,
"loss": 1.1941,
"step": 123600
},
{
"epoch": 49.74,
"learning_rate": 2.050501266491478e-07,
"loss": 1.1922,
"step": 123700
},
{
"epoch": 49.78,
"learning_rate": 1.4639863033169553e-07,
"loss": 1.1914,
"step": 123800
},
{
"epoch": 49.82,
"learning_rate": 9.760382927631594e-08,
"loss": 1.1921,
"step": 123900
},
{
"epoch": 49.86,
"learning_rate": 5.8666108292554366e-08,
"loss": 1.1939,
"step": 124000
},
{
"epoch": 49.9,
"learning_rate": 2.958577445394406e-08,
"loss": 1.193,
"step": 124100
},
{
"epoch": 49.94,
"learning_rate": 1.0363057096479666e-08,
"loss": 1.1924,
"step": 124200
},
{
"epoch": 49.98,
"learning_rate": 9.981078157028645e-10,
"loss": 1.1951,
"step": 124300
},
{
"epoch": 49.98,
"eval_accuracy": 0.4236413605299604,
"eval_loss": 1.1810568571090698,
"eval_runtime": 18.1953,
"eval_samples_per_second": 4373.003,
"eval_steps_per_second": 17.092,
"step": 124300
},
{
"epoch": 49.98,
"step": 124300,
"total_flos": 6.182289190910362e+16,
"train_loss": 1.379773639240096,
"train_runtime": 17824.9007,
"train_samples_per_second": 3571.139,
"train_steps_per_second": 6.973
}
],
"logging_steps": 100,
"max_steps": 124300,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 6.182289190910362e+16,
"trial_name": null,
"trial_params": null
}