batterydata's picture
update
8b8bd0c
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 14.0,
"global_step": 928746,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.9984925910851835e-05,
"loss": 0.1246,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 2.9969851821703676e-05,
"loss": 0.0954,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 2.995477773255551e-05,
"loss": 0.0904,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 2.9939703643407347e-05,
"loss": 0.0884,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 2.9924629554259184e-05,
"loss": 0.0811,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 2.9909555465111022e-05,
"loss": 0.0836,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 2.9894481375962856e-05,
"loss": 0.081,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 2.9879407286814693e-05,
"loss": 0.0775,
"step": 4000
},
{
"epoch": 0.07,
"learning_rate": 2.986433319766653e-05,
"loss": 0.0838,
"step": 4500
},
{
"epoch": 0.08,
"learning_rate": 2.9849259108518368e-05,
"loss": 0.0793,
"step": 5000
},
{
"epoch": 0.08,
"learning_rate": 2.9834185019370202e-05,
"loss": 0.0796,
"step": 5500
},
{
"epoch": 0.09,
"learning_rate": 2.9819110930222043e-05,
"loss": 0.0796,
"step": 6000
},
{
"epoch": 0.1,
"learning_rate": 2.9804036841073877e-05,
"loss": 0.0757,
"step": 6500
},
{
"epoch": 0.11,
"learning_rate": 2.9788962751925715e-05,
"loss": 0.076,
"step": 7000
},
{
"epoch": 0.11,
"learning_rate": 2.9773888662777552e-05,
"loss": 0.0754,
"step": 7500
},
{
"epoch": 0.12,
"learning_rate": 2.975881457362939e-05,
"loss": 0.0745,
"step": 8000
},
{
"epoch": 0.13,
"learning_rate": 2.9743740484481223e-05,
"loss": 0.0748,
"step": 8500
},
{
"epoch": 0.14,
"learning_rate": 2.9728666395333064e-05,
"loss": 0.0735,
"step": 9000
},
{
"epoch": 0.14,
"learning_rate": 2.9713592306184898e-05,
"loss": 0.0739,
"step": 9500
},
{
"epoch": 0.15,
"learning_rate": 2.9698518217036736e-05,
"loss": 0.074,
"step": 10000
},
{
"epoch": 0.16,
"learning_rate": 2.9683444127888573e-05,
"loss": 0.0725,
"step": 10500
},
{
"epoch": 0.17,
"learning_rate": 2.966837003874041e-05,
"loss": 0.0744,
"step": 11000
},
{
"epoch": 0.17,
"learning_rate": 2.9653295949592245e-05,
"loss": 0.0751,
"step": 11500
},
{
"epoch": 0.18,
"learning_rate": 2.9638221860444082e-05,
"loss": 0.0714,
"step": 12000
},
{
"epoch": 0.19,
"learning_rate": 2.962314777129592e-05,
"loss": 0.0726,
"step": 12500
},
{
"epoch": 0.2,
"learning_rate": 2.9608073682147757e-05,
"loss": 0.0711,
"step": 13000
},
{
"epoch": 0.2,
"learning_rate": 2.959299959299959e-05,
"loss": 0.0716,
"step": 13500
},
{
"epoch": 0.21,
"learning_rate": 2.957792550385143e-05,
"loss": 0.0713,
"step": 14000
},
{
"epoch": 0.22,
"learning_rate": 2.9562851414703266e-05,
"loss": 0.07,
"step": 14500
},
{
"epoch": 0.23,
"learning_rate": 2.9547777325555103e-05,
"loss": 0.0688,
"step": 15000
},
{
"epoch": 0.23,
"learning_rate": 2.953270323640694e-05,
"loss": 0.0691,
"step": 15500
},
{
"epoch": 0.24,
"learning_rate": 2.9517629147258778e-05,
"loss": 0.0693,
"step": 16000
},
{
"epoch": 0.25,
"learning_rate": 2.9502555058110612e-05,
"loss": 0.0685,
"step": 16500
},
{
"epoch": 0.26,
"learning_rate": 2.9487480968962453e-05,
"loss": 0.0686,
"step": 17000
},
{
"epoch": 0.26,
"learning_rate": 2.9472406879814287e-05,
"loss": 0.068,
"step": 17500
},
{
"epoch": 0.27,
"learning_rate": 2.9457332790666124e-05,
"loss": 0.0712,
"step": 18000
},
{
"epoch": 0.28,
"learning_rate": 2.944225870151796e-05,
"loss": 0.0683,
"step": 18500
},
{
"epoch": 0.29,
"learning_rate": 2.94271846123698e-05,
"loss": 0.067,
"step": 19000
},
{
"epoch": 0.29,
"learning_rate": 2.9412110523221633e-05,
"loss": 0.0663,
"step": 19500
},
{
"epoch": 0.3,
"learning_rate": 2.939703643407347e-05,
"loss": 0.0666,
"step": 20000
},
{
"epoch": 0.31,
"learning_rate": 2.9381962344925308e-05,
"loss": 0.0668,
"step": 20500
},
{
"epoch": 0.32,
"learning_rate": 2.9366888255777145e-05,
"loss": 0.0666,
"step": 21000
},
{
"epoch": 0.32,
"learning_rate": 2.935181416662898e-05,
"loss": 0.0664,
"step": 21500
},
{
"epoch": 0.33,
"learning_rate": 2.933674007748082e-05,
"loss": 0.0655,
"step": 22000
},
{
"epoch": 0.34,
"learning_rate": 2.9321665988332654e-05,
"loss": 0.0657,
"step": 22500
},
{
"epoch": 0.35,
"learning_rate": 2.930659189918449e-05,
"loss": 0.0669,
"step": 23000
},
{
"epoch": 0.35,
"learning_rate": 2.929151781003633e-05,
"loss": 0.0667,
"step": 23500
},
{
"epoch": 0.36,
"learning_rate": 2.9276443720888167e-05,
"loss": 0.0661,
"step": 24000
},
{
"epoch": 0.37,
"learning_rate": 2.926136963174e-05,
"loss": 0.0644,
"step": 24500
},
{
"epoch": 0.38,
"learning_rate": 2.924629554259184e-05,
"loss": 0.0635,
"step": 25000
},
{
"epoch": 0.38,
"learning_rate": 2.9231221453443675e-05,
"loss": 0.0629,
"step": 25500
},
{
"epoch": 0.39,
"learning_rate": 2.9216147364295513e-05,
"loss": 0.0673,
"step": 26000
},
{
"epoch": 0.4,
"learning_rate": 2.920107327514735e-05,
"loss": 0.0654,
"step": 26500
},
{
"epoch": 0.41,
"learning_rate": 2.9185999185999188e-05,
"loss": 0.0649,
"step": 27000
},
{
"epoch": 0.41,
"learning_rate": 2.9170925096851022e-05,
"loss": 0.0651,
"step": 27500
},
{
"epoch": 0.42,
"learning_rate": 2.915585100770286e-05,
"loss": 0.063,
"step": 28000
},
{
"epoch": 0.43,
"learning_rate": 2.9140776918554697e-05,
"loss": 0.0646,
"step": 28500
},
{
"epoch": 0.44,
"learning_rate": 2.9125702829406534e-05,
"loss": 0.0632,
"step": 29000
},
{
"epoch": 0.44,
"learning_rate": 2.9110628740258368e-05,
"loss": 0.0648,
"step": 29500
},
{
"epoch": 0.45,
"learning_rate": 2.909555465111021e-05,
"loss": 0.0628,
"step": 30000
},
{
"epoch": 0.46,
"learning_rate": 2.9080480561962043e-05,
"loss": 0.0607,
"step": 30500
},
{
"epoch": 0.47,
"learning_rate": 2.906540647281388e-05,
"loss": 0.0622,
"step": 31000
},
{
"epoch": 0.47,
"learning_rate": 2.9050332383665718e-05,
"loss": 0.0645,
"step": 31500
},
{
"epoch": 0.48,
"learning_rate": 2.9035258294517555e-05,
"loss": 0.0633,
"step": 32000
},
{
"epoch": 0.49,
"learning_rate": 2.902018420536939e-05,
"loss": 0.0617,
"step": 32500
},
{
"epoch": 0.5,
"learning_rate": 2.900511011622123e-05,
"loss": 0.0625,
"step": 33000
},
{
"epoch": 0.5,
"learning_rate": 2.8990036027073064e-05,
"loss": 0.0612,
"step": 33500
},
{
"epoch": 0.51,
"learning_rate": 2.89749619379249e-05,
"loss": 0.0631,
"step": 34000
},
{
"epoch": 0.52,
"learning_rate": 2.8959887848776735e-05,
"loss": 0.0626,
"step": 34500
},
{
"epoch": 0.53,
"learning_rate": 2.8944813759628576e-05,
"loss": 0.0624,
"step": 35000
},
{
"epoch": 0.54,
"learning_rate": 2.892973967048041e-05,
"loss": 0.0602,
"step": 35500
},
{
"epoch": 0.54,
"learning_rate": 2.8914665581332248e-05,
"loss": 0.063,
"step": 36000
},
{
"epoch": 0.55,
"learning_rate": 2.8899591492184085e-05,
"loss": 0.0601,
"step": 36500
},
{
"epoch": 0.56,
"learning_rate": 2.8884517403035923e-05,
"loss": 0.0603,
"step": 37000
},
{
"epoch": 0.57,
"learning_rate": 2.8869443313887757e-05,
"loss": 0.059,
"step": 37500
},
{
"epoch": 0.57,
"learning_rate": 2.8854369224739597e-05,
"loss": 0.0583,
"step": 38000
},
{
"epoch": 0.58,
"learning_rate": 2.883929513559143e-05,
"loss": 0.0618,
"step": 38500
},
{
"epoch": 0.59,
"learning_rate": 2.882422104644327e-05,
"loss": 0.0607,
"step": 39000
},
{
"epoch": 0.6,
"learning_rate": 2.8809146957295106e-05,
"loss": 0.0599,
"step": 39500
},
{
"epoch": 0.6,
"learning_rate": 2.8794072868146944e-05,
"loss": 0.0609,
"step": 40000
},
{
"epoch": 0.61,
"learning_rate": 2.8778998778998778e-05,
"loss": 0.0607,
"step": 40500
},
{
"epoch": 0.62,
"learning_rate": 2.876392468985062e-05,
"loss": 0.0607,
"step": 41000
},
{
"epoch": 0.63,
"learning_rate": 2.8748850600702453e-05,
"loss": 0.0585,
"step": 41500
},
{
"epoch": 0.63,
"learning_rate": 2.873377651155429e-05,
"loss": 0.0598,
"step": 42000
},
{
"epoch": 0.64,
"learning_rate": 2.8718702422406124e-05,
"loss": 0.0596,
"step": 42500
},
{
"epoch": 0.65,
"learning_rate": 2.8703628333257965e-05,
"loss": 0.0585,
"step": 43000
},
{
"epoch": 0.66,
"learning_rate": 2.86885542441098e-05,
"loss": 0.0605,
"step": 43500
},
{
"epoch": 0.66,
"learning_rate": 2.8673480154961636e-05,
"loss": 0.06,
"step": 44000
},
{
"epoch": 0.67,
"learning_rate": 2.8658406065813474e-05,
"loss": 0.0607,
"step": 44500
},
{
"epoch": 0.68,
"learning_rate": 2.864333197666531e-05,
"loss": 0.0599,
"step": 45000
},
{
"epoch": 0.69,
"learning_rate": 2.8628257887517145e-05,
"loss": 0.062,
"step": 45500
},
{
"epoch": 0.69,
"learning_rate": 2.8613183798368986e-05,
"loss": 0.06,
"step": 46000
},
{
"epoch": 0.7,
"learning_rate": 2.859810970922082e-05,
"loss": 0.0594,
"step": 46500
},
{
"epoch": 0.71,
"learning_rate": 2.8583035620072657e-05,
"loss": 0.0589,
"step": 47000
},
{
"epoch": 0.72,
"learning_rate": 2.8567961530924495e-05,
"loss": 0.0578,
"step": 47500
},
{
"epoch": 0.72,
"learning_rate": 2.8552887441776332e-05,
"loss": 0.0586,
"step": 48000
},
{
"epoch": 0.73,
"learning_rate": 2.8537813352628166e-05,
"loss": 0.0568,
"step": 48500
},
{
"epoch": 0.74,
"learning_rate": 2.8522739263480007e-05,
"loss": 0.0579,
"step": 49000
},
{
"epoch": 0.75,
"learning_rate": 2.850766517433184e-05,
"loss": 0.0605,
"step": 49500
},
{
"epoch": 0.75,
"learning_rate": 2.849259108518368e-05,
"loss": 0.0552,
"step": 50000
},
{
"epoch": 0.76,
"learning_rate": 2.8477516996035513e-05,
"loss": 0.0573,
"step": 50500
},
{
"epoch": 0.77,
"learning_rate": 2.8462442906887353e-05,
"loss": 0.0585,
"step": 51000
},
{
"epoch": 0.78,
"learning_rate": 2.8447368817739187e-05,
"loss": 0.0577,
"step": 51500
},
{
"epoch": 0.78,
"learning_rate": 2.8432294728591025e-05,
"loss": 0.0553,
"step": 52000
},
{
"epoch": 0.79,
"learning_rate": 2.8417220639442862e-05,
"loss": 0.0593,
"step": 52500
},
{
"epoch": 0.8,
"learning_rate": 2.84021465502947e-05,
"loss": 0.0592,
"step": 53000
},
{
"epoch": 0.81,
"learning_rate": 2.8387072461146534e-05,
"loss": 0.0558,
"step": 53500
},
{
"epoch": 0.81,
"learning_rate": 2.8371998371998375e-05,
"loss": 0.0548,
"step": 54000
},
{
"epoch": 0.82,
"learning_rate": 2.835692428285021e-05,
"loss": 0.0607,
"step": 54500
},
{
"epoch": 0.83,
"learning_rate": 2.8341850193702046e-05,
"loss": 0.0588,
"step": 55000
},
{
"epoch": 0.84,
"learning_rate": 2.8326776104553883e-05,
"loss": 0.0593,
"step": 55500
},
{
"epoch": 0.84,
"learning_rate": 2.831170201540572e-05,
"loss": 0.0577,
"step": 56000
},
{
"epoch": 0.85,
"learning_rate": 2.8296627926257555e-05,
"loss": 0.0567,
"step": 56500
},
{
"epoch": 0.86,
"learning_rate": 2.8281553837109396e-05,
"loss": 0.0589,
"step": 57000
},
{
"epoch": 0.87,
"learning_rate": 2.826647974796123e-05,
"loss": 0.0566,
"step": 57500
},
{
"epoch": 0.87,
"learning_rate": 2.8251405658813067e-05,
"loss": 0.0551,
"step": 58000
},
{
"epoch": 0.88,
"learning_rate": 2.82363315696649e-05,
"loss": 0.0542,
"step": 58500
},
{
"epoch": 0.89,
"learning_rate": 2.8221257480516742e-05,
"loss": 0.0558,
"step": 59000
},
{
"epoch": 0.9,
"learning_rate": 2.8206183391368576e-05,
"loss": 0.0552,
"step": 59500
},
{
"epoch": 0.9,
"learning_rate": 2.8191109302220413e-05,
"loss": 0.0566,
"step": 60000
},
{
"epoch": 0.91,
"learning_rate": 2.817603521307225e-05,
"loss": 0.0576,
"step": 60500
},
{
"epoch": 0.92,
"learning_rate": 2.816096112392409e-05,
"loss": 0.0594,
"step": 61000
},
{
"epoch": 0.93,
"learning_rate": 2.8145887034775922e-05,
"loss": 0.0578,
"step": 61500
},
{
"epoch": 0.93,
"learning_rate": 2.8130812945627763e-05,
"loss": 0.0561,
"step": 62000
},
{
"epoch": 0.94,
"learning_rate": 2.8115738856479597e-05,
"loss": 0.0586,
"step": 62500
},
{
"epoch": 0.95,
"learning_rate": 2.8100664767331435e-05,
"loss": 0.057,
"step": 63000
},
{
"epoch": 0.96,
"learning_rate": 2.8085590678183272e-05,
"loss": 0.0542,
"step": 63500
},
{
"epoch": 0.96,
"learning_rate": 2.807051658903511e-05,
"loss": 0.0556,
"step": 64000
},
{
"epoch": 0.97,
"learning_rate": 2.8055442499886943e-05,
"loss": 0.0567,
"step": 64500
},
{
"epoch": 0.98,
"learning_rate": 2.804036841073878e-05,
"loss": 0.0557,
"step": 65000
},
{
"epoch": 0.99,
"learning_rate": 2.802529432159062e-05,
"loss": 0.0569,
"step": 65500
},
{
"epoch": 0.99,
"learning_rate": 2.8010220232442456e-05,
"loss": 0.058,
"step": 66000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9778978973178251,
"eval_f1": 0.8965530802052815,
"eval_loss": 0.05169834569096565,
"eval_precision": 0.8674113869146362,
"eval_recall": 0.9277209431029291,
"eval_runtime": 281.2887,
"eval_samples_per_second": 419.228,
"eval_steps_per_second": 26.204,
"step": 66339
},
{
"epoch": 1.0,
"learning_rate": 2.799514614329429e-05,
"loss": 0.0535,
"step": 66500
},
{
"epoch": 1.01,
"learning_rate": 2.798007205414613e-05,
"loss": 0.0473,
"step": 67000
},
{
"epoch": 1.02,
"learning_rate": 2.7964997964997965e-05,
"loss": 0.0484,
"step": 67500
},
{
"epoch": 1.03,
"learning_rate": 2.7949923875849802e-05,
"loss": 0.0485,
"step": 68000
},
{
"epoch": 1.03,
"learning_rate": 2.793484978670164e-05,
"loss": 0.0497,
"step": 68500
},
{
"epoch": 1.04,
"learning_rate": 2.7919775697553477e-05,
"loss": 0.0487,
"step": 69000
},
{
"epoch": 1.05,
"learning_rate": 2.790470160840531e-05,
"loss": 0.0494,
"step": 69500
},
{
"epoch": 1.06,
"learning_rate": 2.7889627519257152e-05,
"loss": 0.0488,
"step": 70000
},
{
"epoch": 1.06,
"learning_rate": 2.7874553430108986e-05,
"loss": 0.0488,
"step": 70500
},
{
"epoch": 1.07,
"learning_rate": 2.7859479340960823e-05,
"loss": 0.0477,
"step": 71000
},
{
"epoch": 1.08,
"learning_rate": 2.784440525181266e-05,
"loss": 0.0478,
"step": 71500
},
{
"epoch": 1.09,
"learning_rate": 2.7829331162664498e-05,
"loss": 0.0488,
"step": 72000
},
{
"epoch": 1.09,
"learning_rate": 2.7814257073516332e-05,
"loss": 0.0477,
"step": 72500
},
{
"epoch": 1.1,
"learning_rate": 2.779918298436817e-05,
"loss": 0.049,
"step": 73000
},
{
"epoch": 1.11,
"learning_rate": 2.7784108895220007e-05,
"loss": 0.0496,
"step": 73500
},
{
"epoch": 1.12,
"learning_rate": 2.7769034806071844e-05,
"loss": 0.0487,
"step": 74000
},
{
"epoch": 1.12,
"learning_rate": 2.775396071692368e-05,
"loss": 0.0501,
"step": 74500
},
{
"epoch": 1.13,
"learning_rate": 2.773888662777552e-05,
"loss": 0.0489,
"step": 75000
},
{
"epoch": 1.14,
"learning_rate": 2.7723812538627353e-05,
"loss": 0.0493,
"step": 75500
},
{
"epoch": 1.15,
"learning_rate": 2.770873844947919e-05,
"loss": 0.0488,
"step": 76000
},
{
"epoch": 1.15,
"learning_rate": 2.7693664360331028e-05,
"loss": 0.047,
"step": 76500
},
{
"epoch": 1.16,
"learning_rate": 2.7678590271182865e-05,
"loss": 0.0473,
"step": 77000
},
{
"epoch": 1.17,
"learning_rate": 2.76635161820347e-05,
"loss": 0.0495,
"step": 77500
},
{
"epoch": 1.18,
"learning_rate": 2.764844209288654e-05,
"loss": 0.0473,
"step": 78000
},
{
"epoch": 1.18,
"learning_rate": 2.7633368003738374e-05,
"loss": 0.0496,
"step": 78500
},
{
"epoch": 1.19,
"learning_rate": 2.7618293914590212e-05,
"loss": 0.0496,
"step": 79000
},
{
"epoch": 1.2,
"learning_rate": 2.760321982544205e-05,
"loss": 0.048,
"step": 79500
},
{
"epoch": 1.21,
"learning_rate": 2.7588145736293887e-05,
"loss": 0.0492,
"step": 80000
},
{
"epoch": 1.21,
"learning_rate": 2.757307164714572e-05,
"loss": 0.0507,
"step": 80500
},
{
"epoch": 1.22,
"learning_rate": 2.7557997557997558e-05,
"loss": 0.047,
"step": 81000
},
{
"epoch": 1.23,
"learning_rate": 2.7542923468849396e-05,
"loss": 0.0485,
"step": 81500
},
{
"epoch": 1.24,
"learning_rate": 2.7527849379701233e-05,
"loss": 0.0487,
"step": 82000
},
{
"epoch": 1.24,
"learning_rate": 2.7512775290553067e-05,
"loss": 0.0469,
"step": 82500
},
{
"epoch": 1.25,
"learning_rate": 2.7497701201404908e-05,
"loss": 0.0494,
"step": 83000
},
{
"epoch": 1.26,
"learning_rate": 2.7482627112256742e-05,
"loss": 0.0456,
"step": 83500
},
{
"epoch": 1.27,
"learning_rate": 2.746755302310858e-05,
"loss": 0.0495,
"step": 84000
},
{
"epoch": 1.27,
"learning_rate": 2.7452478933960417e-05,
"loss": 0.0481,
"step": 84500
},
{
"epoch": 1.28,
"learning_rate": 2.7437404844812254e-05,
"loss": 0.0472,
"step": 85000
},
{
"epoch": 1.29,
"learning_rate": 2.7422330755664088e-05,
"loss": 0.0484,
"step": 85500
},
{
"epoch": 1.3,
"learning_rate": 2.740725666651593e-05,
"loss": 0.0463,
"step": 86000
},
{
"epoch": 1.3,
"learning_rate": 2.7392182577367763e-05,
"loss": 0.0468,
"step": 86500
},
{
"epoch": 1.31,
"learning_rate": 2.73771084882196e-05,
"loss": 0.0465,
"step": 87000
},
{
"epoch": 1.32,
"learning_rate": 2.7362034399071438e-05,
"loss": 0.0473,
"step": 87500
},
{
"epoch": 1.33,
"learning_rate": 2.7346960309923275e-05,
"loss": 0.0472,
"step": 88000
},
{
"epoch": 1.33,
"learning_rate": 2.733188622077511e-05,
"loss": 0.048,
"step": 88500
},
{
"epoch": 1.34,
"learning_rate": 2.7316812131626947e-05,
"loss": 0.0484,
"step": 89000
},
{
"epoch": 1.35,
"learning_rate": 2.7301738042478784e-05,
"loss": 0.0494,
"step": 89500
},
{
"epoch": 1.36,
"learning_rate": 2.728666395333062e-05,
"loss": 0.0466,
"step": 90000
},
{
"epoch": 1.36,
"learning_rate": 2.7271589864182456e-05,
"loss": 0.0487,
"step": 90500
},
{
"epoch": 1.37,
"learning_rate": 2.7256515775034296e-05,
"loss": 0.0473,
"step": 91000
},
{
"epoch": 1.38,
"learning_rate": 2.724144168588613e-05,
"loss": 0.0487,
"step": 91500
},
{
"epoch": 1.39,
"learning_rate": 2.7226367596737968e-05,
"loss": 0.0466,
"step": 92000
},
{
"epoch": 1.39,
"learning_rate": 2.7211293507589805e-05,
"loss": 0.0478,
"step": 92500
},
{
"epoch": 1.4,
"learning_rate": 2.7196219418441643e-05,
"loss": 0.0484,
"step": 93000
},
{
"epoch": 1.41,
"learning_rate": 2.7181145329293477e-05,
"loss": 0.0444,
"step": 93500
},
{
"epoch": 1.42,
"learning_rate": 2.7166071240145317e-05,
"loss": 0.046,
"step": 94000
},
{
"epoch": 1.42,
"learning_rate": 2.715099715099715e-05,
"loss": 0.047,
"step": 94500
},
{
"epoch": 1.43,
"learning_rate": 2.713592306184899e-05,
"loss": 0.0462,
"step": 95000
},
{
"epoch": 1.44,
"learning_rate": 2.7120848972700823e-05,
"loss": 0.0466,
"step": 95500
},
{
"epoch": 1.45,
"learning_rate": 2.7105774883552664e-05,
"loss": 0.0483,
"step": 96000
},
{
"epoch": 1.45,
"learning_rate": 2.7090700794404498e-05,
"loss": 0.046,
"step": 96500
},
{
"epoch": 1.46,
"learning_rate": 2.7075626705256335e-05,
"loss": 0.0468,
"step": 97000
},
{
"epoch": 1.47,
"learning_rate": 2.7060552616108173e-05,
"loss": 0.0459,
"step": 97500
},
{
"epoch": 1.48,
"learning_rate": 2.704547852696001e-05,
"loss": 0.0487,
"step": 98000
},
{
"epoch": 1.48,
"learning_rate": 2.7030404437811844e-05,
"loss": 0.0473,
"step": 98500
},
{
"epoch": 1.49,
"learning_rate": 2.7015330348663685e-05,
"loss": 0.0453,
"step": 99000
},
{
"epoch": 1.5,
"learning_rate": 2.700025625951552e-05,
"loss": 0.0461,
"step": 99500
},
{
"epoch": 1.51,
"learning_rate": 2.6985182170367356e-05,
"loss": 0.0467,
"step": 100000
},
{
"epoch": 1.51,
"learning_rate": 2.6970108081219194e-05,
"loss": 0.0475,
"step": 100500
},
{
"epoch": 1.52,
"learning_rate": 2.695503399207103e-05,
"loss": 0.0486,
"step": 101000
},
{
"epoch": 1.53,
"learning_rate": 2.6939959902922865e-05,
"loss": 0.0476,
"step": 101500
},
{
"epoch": 1.54,
"learning_rate": 2.6924885813774706e-05,
"loss": 0.0481,
"step": 102000
},
{
"epoch": 1.55,
"learning_rate": 2.690981172462654e-05,
"loss": 0.0458,
"step": 102500
},
{
"epoch": 1.55,
"learning_rate": 2.6894737635478378e-05,
"loss": 0.0463,
"step": 103000
},
{
"epoch": 1.56,
"learning_rate": 2.687966354633021e-05,
"loss": 0.0468,
"step": 103500
},
{
"epoch": 1.57,
"learning_rate": 2.6864589457182052e-05,
"loss": 0.0476,
"step": 104000
},
{
"epoch": 1.58,
"learning_rate": 2.6849515368033886e-05,
"loss": 0.0459,
"step": 104500
},
{
"epoch": 1.58,
"learning_rate": 2.6834441278885724e-05,
"loss": 0.0455,
"step": 105000
},
{
"epoch": 1.59,
"learning_rate": 2.681936718973756e-05,
"loss": 0.0465,
"step": 105500
},
{
"epoch": 1.6,
"learning_rate": 2.68042931005894e-05,
"loss": 0.0454,
"step": 106000
},
{
"epoch": 1.61,
"learning_rate": 2.6789219011441233e-05,
"loss": 0.0482,
"step": 106500
},
{
"epoch": 1.61,
"learning_rate": 2.6774144922293074e-05,
"loss": 0.0467,
"step": 107000
},
{
"epoch": 1.62,
"learning_rate": 2.6759070833144908e-05,
"loss": 0.0467,
"step": 107500
},
{
"epoch": 1.63,
"learning_rate": 2.6743996743996745e-05,
"loss": 0.0486,
"step": 108000
},
{
"epoch": 1.64,
"learning_rate": 2.6728922654848582e-05,
"loss": 0.0465,
"step": 108500
},
{
"epoch": 1.64,
"learning_rate": 2.671384856570042e-05,
"loss": 0.0476,
"step": 109000
},
{
"epoch": 1.65,
"learning_rate": 2.6698774476552254e-05,
"loss": 0.0456,
"step": 109500
},
{
"epoch": 1.66,
"learning_rate": 2.6683700387404095e-05,
"loss": 0.0459,
"step": 110000
},
{
"epoch": 1.67,
"learning_rate": 2.666862629825593e-05,
"loss": 0.0478,
"step": 110500
},
{
"epoch": 1.67,
"learning_rate": 2.6653552209107766e-05,
"loss": 0.0459,
"step": 111000
},
{
"epoch": 1.68,
"learning_rate": 2.66384781199596e-05,
"loss": 0.0471,
"step": 111500
},
{
"epoch": 1.69,
"learning_rate": 2.662340403081144e-05,
"loss": 0.0459,
"step": 112000
},
{
"epoch": 1.7,
"learning_rate": 2.6608329941663275e-05,
"loss": 0.0479,
"step": 112500
},
{
"epoch": 1.7,
"learning_rate": 2.6593255852515112e-05,
"loss": 0.0459,
"step": 113000
},
{
"epoch": 1.71,
"learning_rate": 2.657818176336695e-05,
"loss": 0.0455,
"step": 113500
},
{
"epoch": 1.72,
"learning_rate": 2.6563107674218787e-05,
"loss": 0.0487,
"step": 114000
},
{
"epoch": 1.73,
"learning_rate": 2.654803358507062e-05,
"loss": 0.0473,
"step": 114500
},
{
"epoch": 1.73,
"learning_rate": 2.6532959495922462e-05,
"loss": 0.0461,
"step": 115000
},
{
"epoch": 1.74,
"learning_rate": 2.6517885406774296e-05,
"loss": 0.0455,
"step": 115500
},
{
"epoch": 1.75,
"learning_rate": 2.6502811317626134e-05,
"loss": 0.0451,
"step": 116000
},
{
"epoch": 1.76,
"learning_rate": 2.648773722847797e-05,
"loss": 0.046,
"step": 116500
},
{
"epoch": 1.76,
"learning_rate": 2.647266313932981e-05,
"loss": 0.047,
"step": 117000
},
{
"epoch": 1.77,
"learning_rate": 2.6457589050181642e-05,
"loss": 0.0461,
"step": 117500
},
{
"epoch": 1.78,
"learning_rate": 2.644251496103348e-05,
"loss": 0.0467,
"step": 118000
},
{
"epoch": 1.79,
"learning_rate": 2.6427440871885317e-05,
"loss": 0.0473,
"step": 118500
},
{
"epoch": 1.79,
"learning_rate": 2.6412366782737155e-05,
"loss": 0.044,
"step": 119000
},
{
"epoch": 1.8,
"learning_rate": 2.639729269358899e-05,
"loss": 0.0474,
"step": 119500
},
{
"epoch": 1.81,
"learning_rate": 2.638221860444083e-05,
"loss": 0.0465,
"step": 120000
},
{
"epoch": 1.82,
"learning_rate": 2.6367144515292664e-05,
"loss": 0.048,
"step": 120500
},
{
"epoch": 1.82,
"learning_rate": 2.63520704261445e-05,
"loss": 0.0462,
"step": 121000
},
{
"epoch": 1.83,
"learning_rate": 2.633699633699634e-05,
"loss": 0.0466,
"step": 121500
},
{
"epoch": 1.84,
"learning_rate": 2.6321922247848176e-05,
"loss": 0.0459,
"step": 122000
},
{
"epoch": 1.85,
"learning_rate": 2.630684815870001e-05,
"loss": 0.0462,
"step": 122500
},
{
"epoch": 1.85,
"learning_rate": 2.629177406955185e-05,
"loss": 0.0444,
"step": 123000
},
{
"epoch": 1.86,
"learning_rate": 2.6276699980403685e-05,
"loss": 0.0458,
"step": 123500
},
{
"epoch": 1.87,
"learning_rate": 2.6261625891255522e-05,
"loss": 0.0448,
"step": 124000
},
{
"epoch": 1.88,
"learning_rate": 2.624655180210736e-05,
"loss": 0.0452,
"step": 124500
},
{
"epoch": 1.88,
"learning_rate": 2.6231477712959197e-05,
"loss": 0.0473,
"step": 125000
},
{
"epoch": 1.89,
"learning_rate": 2.621640362381103e-05,
"loss": 0.0446,
"step": 125500
},
{
"epoch": 1.9,
"learning_rate": 2.620132953466287e-05,
"loss": 0.0442,
"step": 126000
},
{
"epoch": 1.91,
"learning_rate": 2.6186255445514706e-05,
"loss": 0.0464,
"step": 126500
},
{
"epoch": 1.91,
"learning_rate": 2.6171181356366543e-05,
"loss": 0.0454,
"step": 127000
},
{
"epoch": 1.92,
"learning_rate": 2.6156107267218377e-05,
"loss": 0.046,
"step": 127500
},
{
"epoch": 1.93,
"learning_rate": 2.6141033178070218e-05,
"loss": 0.0441,
"step": 128000
},
{
"epoch": 1.94,
"learning_rate": 2.6125959088922052e-05,
"loss": 0.0451,
"step": 128500
},
{
"epoch": 1.94,
"learning_rate": 2.611088499977389e-05,
"loss": 0.0445,
"step": 129000
},
{
"epoch": 1.95,
"learning_rate": 2.6095810910625727e-05,
"loss": 0.0447,
"step": 129500
},
{
"epoch": 1.96,
"learning_rate": 2.6080736821477564e-05,
"loss": 0.046,
"step": 130000
},
{
"epoch": 1.97,
"learning_rate": 2.60656627323294e-05,
"loss": 0.041,
"step": 130500
},
{
"epoch": 1.97,
"learning_rate": 2.605058864318124e-05,
"loss": 0.0469,
"step": 131000
},
{
"epoch": 1.98,
"learning_rate": 2.6035514554033073e-05,
"loss": 0.0443,
"step": 131500
},
{
"epoch": 1.99,
"learning_rate": 2.602044046488491e-05,
"loss": 0.0435,
"step": 132000
},
{
"epoch": 2.0,
"learning_rate": 2.6005366375736748e-05,
"loss": 0.0461,
"step": 132500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9813823655715507,
"eval_f1": 0.9126315393792152,
"eval_loss": 0.04644881188869476,
"eval_precision": 0.8880811658933832,
"eval_recall": 0.9385778580787981,
"eval_runtime": 245.1843,
"eval_samples_per_second": 480.961,
"eval_steps_per_second": 30.063,
"step": 132678
},
{
"epoch": 2.0,
"learning_rate": 2.5990292286588586e-05,
"loss": 0.0385,
"step": 133000
},
{
"epoch": 2.01,
"learning_rate": 2.597521819744042e-05,
"loss": 0.036,
"step": 133500
},
{
"epoch": 2.02,
"learning_rate": 2.5960144108292257e-05,
"loss": 0.037,
"step": 134000
},
{
"epoch": 2.03,
"learning_rate": 2.5945070019144094e-05,
"loss": 0.035,
"step": 134500
},
{
"epoch": 2.04,
"learning_rate": 2.5929995929995932e-05,
"loss": 0.0391,
"step": 135000
},
{
"epoch": 2.04,
"learning_rate": 2.5914921840847766e-05,
"loss": 0.0349,
"step": 135500
},
{
"epoch": 2.05,
"learning_rate": 2.5899847751699607e-05,
"loss": 0.0383,
"step": 136000
},
{
"epoch": 2.06,
"learning_rate": 2.588477366255144e-05,
"loss": 0.038,
"step": 136500
},
{
"epoch": 2.07,
"learning_rate": 2.5869699573403278e-05,
"loss": 0.038,
"step": 137000
},
{
"epoch": 2.07,
"learning_rate": 2.5854625484255116e-05,
"loss": 0.0373,
"step": 137500
},
{
"epoch": 2.08,
"learning_rate": 2.5839551395106953e-05,
"loss": 0.0367,
"step": 138000
},
{
"epoch": 2.09,
"learning_rate": 2.5824477305958787e-05,
"loss": 0.0375,
"step": 138500
},
{
"epoch": 2.1,
"learning_rate": 2.5809403216810628e-05,
"loss": 0.0363,
"step": 139000
},
{
"epoch": 2.1,
"learning_rate": 2.5794329127662462e-05,
"loss": 0.0384,
"step": 139500
},
{
"epoch": 2.11,
"learning_rate": 2.57792550385143e-05,
"loss": 0.0374,
"step": 140000
},
{
"epoch": 2.12,
"learning_rate": 2.5764180949366137e-05,
"loss": 0.0372,
"step": 140500
},
{
"epoch": 2.13,
"learning_rate": 2.5749106860217974e-05,
"loss": 0.0386,
"step": 141000
},
{
"epoch": 2.13,
"learning_rate": 2.5734032771069808e-05,
"loss": 0.0375,
"step": 141500
},
{
"epoch": 2.14,
"learning_rate": 2.5718958681921646e-05,
"loss": 0.0386,
"step": 142000
},
{
"epoch": 2.15,
"learning_rate": 2.5703884592773483e-05,
"loss": 0.0383,
"step": 142500
},
{
"epoch": 2.16,
"learning_rate": 2.568881050362532e-05,
"loss": 0.0369,
"step": 143000
},
{
"epoch": 2.16,
"learning_rate": 2.5673736414477155e-05,
"loss": 0.0381,
"step": 143500
},
{
"epoch": 2.17,
"learning_rate": 2.5658662325328995e-05,
"loss": 0.036,
"step": 144000
},
{
"epoch": 2.18,
"learning_rate": 2.564358823618083e-05,
"loss": 0.0378,
"step": 144500
},
{
"epoch": 2.19,
"learning_rate": 2.5628514147032667e-05,
"loss": 0.0376,
"step": 145000
},
{
"epoch": 2.19,
"learning_rate": 2.5613440057884504e-05,
"loss": 0.0373,
"step": 145500
},
{
"epoch": 2.2,
"learning_rate": 2.559836596873634e-05,
"loss": 0.0381,
"step": 146000
},
{
"epoch": 2.21,
"learning_rate": 2.5583291879588176e-05,
"loss": 0.0372,
"step": 146500
},
{
"epoch": 2.22,
"learning_rate": 2.5568217790440016e-05,
"loss": 0.0369,
"step": 147000
},
{
"epoch": 2.22,
"learning_rate": 2.555314370129185e-05,
"loss": 0.0386,
"step": 147500
},
{
"epoch": 2.23,
"learning_rate": 2.5538069612143688e-05,
"loss": 0.037,
"step": 148000
},
{
"epoch": 2.24,
"learning_rate": 2.5522995522995522e-05,
"loss": 0.0398,
"step": 148500
},
{
"epoch": 2.25,
"learning_rate": 2.5507921433847363e-05,
"loss": 0.0368,
"step": 149000
},
{
"epoch": 2.25,
"learning_rate": 2.5492847344699197e-05,
"loss": 0.0368,
"step": 149500
},
{
"epoch": 2.26,
"learning_rate": 2.547777325555103e-05,
"loss": 0.0382,
"step": 150000
},
{
"epoch": 2.27,
"learning_rate": 2.546269916640287e-05,
"loss": 0.04,
"step": 150500
},
{
"epoch": 2.28,
"learning_rate": 2.5447625077254706e-05,
"loss": 0.0373,
"step": 151000
},
{
"epoch": 2.28,
"learning_rate": 2.5432550988106543e-05,
"loss": 0.0378,
"step": 151500
},
{
"epoch": 2.29,
"learning_rate": 2.541747689895838e-05,
"loss": 0.0395,
"step": 152000
},
{
"epoch": 2.3,
"learning_rate": 2.5402402809810218e-05,
"loss": 0.0377,
"step": 152500
},
{
"epoch": 2.31,
"learning_rate": 2.5387328720662052e-05,
"loss": 0.0383,
"step": 153000
},
{
"epoch": 2.31,
"learning_rate": 2.5372254631513893e-05,
"loss": 0.0374,
"step": 153500
},
{
"epoch": 2.32,
"learning_rate": 2.5357180542365727e-05,
"loss": 0.0393,
"step": 154000
},
{
"epoch": 2.33,
"learning_rate": 2.5342106453217564e-05,
"loss": 0.0377,
"step": 154500
},
{
"epoch": 2.34,
"learning_rate": 2.53270323640694e-05,
"loss": 0.0376,
"step": 155000
},
{
"epoch": 2.34,
"learning_rate": 2.531195827492124e-05,
"loss": 0.0388,
"step": 155500
},
{
"epoch": 2.35,
"learning_rate": 2.5296884185773073e-05,
"loss": 0.0369,
"step": 156000
},
{
"epoch": 2.36,
"learning_rate": 2.528181009662491e-05,
"loss": 0.038,
"step": 156500
},
{
"epoch": 2.37,
"learning_rate": 2.5266736007476748e-05,
"loss": 0.0386,
"step": 157000
},
{
"epoch": 2.37,
"learning_rate": 2.5251661918328585e-05,
"loss": 0.0389,
"step": 157500
},
{
"epoch": 2.38,
"learning_rate": 2.523658782918042e-05,
"loss": 0.0374,
"step": 158000
},
{
"epoch": 2.39,
"learning_rate": 2.522151374003226e-05,
"loss": 0.0377,
"step": 158500
},
{
"epoch": 2.4,
"learning_rate": 2.5206439650884094e-05,
"loss": 0.039,
"step": 159000
},
{
"epoch": 2.4,
"learning_rate": 2.519136556173593e-05,
"loss": 0.0392,
"step": 159500
},
{
"epoch": 2.41,
"learning_rate": 2.517629147258777e-05,
"loss": 0.038,
"step": 160000
},
{
"epoch": 2.42,
"learning_rate": 2.5161217383439607e-05,
"loss": 0.0383,
"step": 160500
},
{
"epoch": 2.43,
"learning_rate": 2.514614329429144e-05,
"loss": 0.0364,
"step": 161000
},
{
"epoch": 2.43,
"learning_rate": 2.513106920514328e-05,
"loss": 0.0366,
"step": 161500
},
{
"epoch": 2.44,
"learning_rate": 2.5115995115995115e-05,
"loss": 0.0387,
"step": 162000
},
{
"epoch": 2.45,
"learning_rate": 2.5100921026846953e-05,
"loss": 0.0369,
"step": 162500
},
{
"epoch": 2.46,
"learning_rate": 2.508584693769879e-05,
"loss": 0.0384,
"step": 163000
},
{
"epoch": 2.46,
"learning_rate": 2.5070772848550628e-05,
"loss": 0.0386,
"step": 163500
},
{
"epoch": 2.47,
"learning_rate": 2.5055698759402462e-05,
"loss": 0.0377,
"step": 164000
},
{
"epoch": 2.48,
"learning_rate": 2.50406246702543e-05,
"loss": 0.0366,
"step": 164500
},
{
"epoch": 2.49,
"learning_rate": 2.5025550581106137e-05,
"loss": 0.0379,
"step": 165000
},
{
"epoch": 2.49,
"learning_rate": 2.5010476491957974e-05,
"loss": 0.0381,
"step": 165500
},
{
"epoch": 2.5,
"learning_rate": 2.4995402402809808e-05,
"loss": 0.0377,
"step": 166000
},
{
"epoch": 2.51,
"learning_rate": 2.498032831366165e-05,
"loss": 0.0399,
"step": 166500
},
{
"epoch": 2.52,
"learning_rate": 2.4965254224513483e-05,
"loss": 0.0388,
"step": 167000
},
{
"epoch": 2.52,
"learning_rate": 2.495018013536532e-05,
"loss": 0.0391,
"step": 167500
},
{
"epoch": 2.53,
"learning_rate": 2.4935106046217158e-05,
"loss": 0.0379,
"step": 168000
},
{
"epoch": 2.54,
"learning_rate": 2.4920031957068995e-05,
"loss": 0.0381,
"step": 168500
},
{
"epoch": 2.55,
"learning_rate": 2.490495786792083e-05,
"loss": 0.0378,
"step": 169000
},
{
"epoch": 2.56,
"learning_rate": 2.488988377877267e-05,
"loss": 0.039,
"step": 169500
},
{
"epoch": 2.56,
"learning_rate": 2.4874809689624504e-05,
"loss": 0.0386,
"step": 170000
},
{
"epoch": 2.57,
"learning_rate": 2.485973560047634e-05,
"loss": 0.0387,
"step": 170500
},
{
"epoch": 2.58,
"learning_rate": 2.484466151132818e-05,
"loss": 0.0372,
"step": 171000
},
{
"epoch": 2.59,
"learning_rate": 2.4829587422180016e-05,
"loss": 0.0387,
"step": 171500
},
{
"epoch": 2.59,
"learning_rate": 2.481451333303185e-05,
"loss": 0.0367,
"step": 172000
},
{
"epoch": 2.6,
"learning_rate": 2.4799439243883688e-05,
"loss": 0.0383,
"step": 172500
},
{
"epoch": 2.61,
"learning_rate": 2.4784365154735525e-05,
"loss": 0.037,
"step": 173000
},
{
"epoch": 2.62,
"learning_rate": 2.4769291065587363e-05,
"loss": 0.0366,
"step": 173500
},
{
"epoch": 2.62,
"learning_rate": 2.4754216976439197e-05,
"loss": 0.0366,
"step": 174000
},
{
"epoch": 2.63,
"learning_rate": 2.4739142887291037e-05,
"loss": 0.0386,
"step": 174500
},
{
"epoch": 2.64,
"learning_rate": 2.472406879814287e-05,
"loss": 0.038,
"step": 175000
},
{
"epoch": 2.65,
"learning_rate": 2.470899470899471e-05,
"loss": 0.0382,
"step": 175500
},
{
"epoch": 2.65,
"learning_rate": 2.4693920619846546e-05,
"loss": 0.0405,
"step": 176000
},
{
"epoch": 2.66,
"learning_rate": 2.4678846530698384e-05,
"loss": 0.0364,
"step": 176500
},
{
"epoch": 2.67,
"learning_rate": 2.4663772441550218e-05,
"loss": 0.0371,
"step": 177000
},
{
"epoch": 2.68,
"learning_rate": 2.464869835240206e-05,
"loss": 0.037,
"step": 177500
},
{
"epoch": 2.68,
"learning_rate": 2.4633624263253893e-05,
"loss": 0.0372,
"step": 178000
},
{
"epoch": 2.69,
"learning_rate": 2.461855017410573e-05,
"loss": 0.0377,
"step": 178500
},
{
"epoch": 2.7,
"learning_rate": 2.4603476084957567e-05,
"loss": 0.0363,
"step": 179000
},
{
"epoch": 2.71,
"learning_rate": 2.4588401995809405e-05,
"loss": 0.0401,
"step": 179500
},
{
"epoch": 2.71,
"learning_rate": 2.457332790666124e-05,
"loss": 0.0373,
"step": 180000
},
{
"epoch": 2.72,
"learning_rate": 2.4558253817513076e-05,
"loss": 0.0376,
"step": 180500
},
{
"epoch": 2.73,
"learning_rate": 2.4543179728364914e-05,
"loss": 0.0371,
"step": 181000
},
{
"epoch": 2.74,
"learning_rate": 2.452810563921675e-05,
"loss": 0.0371,
"step": 181500
},
{
"epoch": 2.74,
"learning_rate": 2.4513031550068585e-05,
"loss": 0.0377,
"step": 182000
},
{
"epoch": 2.75,
"learning_rate": 2.4497957460920426e-05,
"loss": 0.0387,
"step": 182500
},
{
"epoch": 2.76,
"learning_rate": 2.448288337177226e-05,
"loss": 0.0389,
"step": 183000
},
{
"epoch": 2.77,
"learning_rate": 2.4467809282624097e-05,
"loss": 0.0379,
"step": 183500
},
{
"epoch": 2.77,
"learning_rate": 2.4452735193475935e-05,
"loss": 0.037,
"step": 184000
},
{
"epoch": 2.78,
"learning_rate": 2.4437661104327772e-05,
"loss": 0.037,
"step": 184500
},
{
"epoch": 2.79,
"learning_rate": 2.4422587015179606e-05,
"loss": 0.0383,
"step": 185000
},
{
"epoch": 2.8,
"learning_rate": 2.4407512926031447e-05,
"loss": 0.0374,
"step": 185500
},
{
"epoch": 2.8,
"learning_rate": 2.439243883688328e-05,
"loss": 0.0376,
"step": 186000
},
{
"epoch": 2.81,
"learning_rate": 2.437736474773512e-05,
"loss": 0.0378,
"step": 186500
},
{
"epoch": 2.82,
"learning_rate": 2.4362290658586953e-05,
"loss": 0.037,
"step": 187000
},
{
"epoch": 2.83,
"learning_rate": 2.4347216569438793e-05,
"loss": 0.0364,
"step": 187500
},
{
"epoch": 2.83,
"learning_rate": 2.4332142480290627e-05,
"loss": 0.0382,
"step": 188000
},
{
"epoch": 2.84,
"learning_rate": 2.4317068391142465e-05,
"loss": 0.0372,
"step": 188500
},
{
"epoch": 2.85,
"learning_rate": 2.4301994301994302e-05,
"loss": 0.0381,
"step": 189000
},
{
"epoch": 2.86,
"learning_rate": 2.428692021284614e-05,
"loss": 0.0387,
"step": 189500
},
{
"epoch": 2.86,
"learning_rate": 2.4271846123697974e-05,
"loss": 0.0384,
"step": 190000
},
{
"epoch": 2.87,
"learning_rate": 2.4256772034549815e-05,
"loss": 0.0381,
"step": 190500
},
{
"epoch": 2.88,
"learning_rate": 2.424169794540165e-05,
"loss": 0.0376,
"step": 191000
},
{
"epoch": 2.89,
"learning_rate": 2.4226623856253486e-05,
"loss": 0.0383,
"step": 191500
},
{
"epoch": 2.89,
"learning_rate": 2.4211549767105323e-05,
"loss": 0.0381,
"step": 192000
},
{
"epoch": 2.9,
"learning_rate": 2.419647567795716e-05,
"loss": 0.0366,
"step": 192500
},
{
"epoch": 2.91,
"learning_rate": 2.4181401588808995e-05,
"loss": 0.0391,
"step": 193000
},
{
"epoch": 2.92,
"learning_rate": 2.4166327499660836e-05,
"loss": 0.038,
"step": 193500
},
{
"epoch": 2.92,
"learning_rate": 2.415125341051267e-05,
"loss": 0.0384,
"step": 194000
},
{
"epoch": 2.93,
"learning_rate": 2.4136179321364507e-05,
"loss": 0.0384,
"step": 194500
},
{
"epoch": 2.94,
"learning_rate": 2.412110523221634e-05,
"loss": 0.0375,
"step": 195000
},
{
"epoch": 2.95,
"learning_rate": 2.4106031143068182e-05,
"loss": 0.038,
"step": 195500
},
{
"epoch": 2.95,
"learning_rate": 2.4090957053920016e-05,
"loss": 0.0349,
"step": 196000
},
{
"epoch": 2.96,
"learning_rate": 2.4075882964771853e-05,
"loss": 0.0368,
"step": 196500
},
{
"epoch": 2.97,
"learning_rate": 2.406080887562369e-05,
"loss": 0.0393,
"step": 197000
},
{
"epoch": 2.98,
"learning_rate": 2.404573478647553e-05,
"loss": 0.0367,
"step": 197500
},
{
"epoch": 2.98,
"learning_rate": 2.4030660697327362e-05,
"loss": 0.0381,
"step": 198000
},
{
"epoch": 2.99,
"learning_rate": 2.4015586608179203e-05,
"loss": 0.0375,
"step": 198500
},
{
"epoch": 3.0,
"learning_rate": 2.4000512519031037e-05,
"loss": 0.0375,
"step": 199000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9830589596281374,
"eval_f1": 0.9202532892026535,
"eval_loss": 0.04450139403343201,
"eval_precision": 0.9090566202540189,
"eval_recall": 0.9317292119377233,
"eval_runtime": 241.3574,
"eval_samples_per_second": 488.587,
"eval_steps_per_second": 30.54,
"step": 199017
},
{
"epoch": 3.01,
"learning_rate": 2.3985438429882875e-05,
"loss": 0.0305,
"step": 199500
},
{
"epoch": 3.01,
"learning_rate": 2.3970364340734712e-05,
"loss": 0.0303,
"step": 200000
},
{
"epoch": 3.02,
"learning_rate": 2.395529025158655e-05,
"loss": 0.0309,
"step": 200500
},
{
"epoch": 3.03,
"learning_rate": 2.3940216162438383e-05,
"loss": 0.0296,
"step": 201000
},
{
"epoch": 3.04,
"learning_rate": 2.3925142073290224e-05,
"loss": 0.0315,
"step": 201500
},
{
"epoch": 3.04,
"learning_rate": 2.391006798414206e-05,
"loss": 0.0295,
"step": 202000
},
{
"epoch": 3.05,
"learning_rate": 2.3894993894993896e-05,
"loss": 0.0308,
"step": 202500
},
{
"epoch": 3.06,
"learning_rate": 2.387991980584573e-05,
"loss": 0.0309,
"step": 203000
},
{
"epoch": 3.07,
"learning_rate": 2.386484571669757e-05,
"loss": 0.031,
"step": 203500
},
{
"epoch": 3.08,
"learning_rate": 2.3849771627549405e-05,
"loss": 0.0312,
"step": 204000
},
{
"epoch": 3.08,
"learning_rate": 2.3834697538401242e-05,
"loss": 0.0304,
"step": 204500
},
{
"epoch": 3.09,
"learning_rate": 2.381962344925308e-05,
"loss": 0.03,
"step": 205000
},
{
"epoch": 3.1,
"learning_rate": 2.3804549360104917e-05,
"loss": 0.0301,
"step": 205500
},
{
"epoch": 3.11,
"learning_rate": 2.378947527095675e-05,
"loss": 0.0301,
"step": 206000
},
{
"epoch": 3.11,
"learning_rate": 2.3774401181808592e-05,
"loss": 0.0293,
"step": 206500
},
{
"epoch": 3.12,
"learning_rate": 2.3759327092660426e-05,
"loss": 0.0306,
"step": 207000
},
{
"epoch": 3.13,
"learning_rate": 2.3744253003512263e-05,
"loss": 0.0296,
"step": 207500
},
{
"epoch": 3.14,
"learning_rate": 2.37291789143641e-05,
"loss": 0.0302,
"step": 208000
},
{
"epoch": 3.14,
"learning_rate": 2.3714104825215938e-05,
"loss": 0.0303,
"step": 208500
},
{
"epoch": 3.15,
"learning_rate": 2.3699030736067772e-05,
"loss": 0.0317,
"step": 209000
},
{
"epoch": 3.16,
"learning_rate": 2.368395664691961e-05,
"loss": 0.0318,
"step": 209500
},
{
"epoch": 3.17,
"learning_rate": 2.3668882557771447e-05,
"loss": 0.0314,
"step": 210000
},
{
"epoch": 3.17,
"learning_rate": 2.3653808468623284e-05,
"loss": 0.0286,
"step": 210500
},
{
"epoch": 3.18,
"learning_rate": 2.363873437947512e-05,
"loss": 0.0306,
"step": 211000
},
{
"epoch": 3.19,
"learning_rate": 2.362366029032696e-05,
"loss": 0.0321,
"step": 211500
},
{
"epoch": 3.2,
"learning_rate": 2.3608586201178793e-05,
"loss": 0.031,
"step": 212000
},
{
"epoch": 3.2,
"learning_rate": 2.359351211203063e-05,
"loss": 0.0282,
"step": 212500
},
{
"epoch": 3.21,
"learning_rate": 2.3578438022882468e-05,
"loss": 0.0312,
"step": 213000
},
{
"epoch": 3.22,
"learning_rate": 2.3563363933734305e-05,
"loss": 0.0311,
"step": 213500
},
{
"epoch": 3.23,
"learning_rate": 2.354828984458614e-05,
"loss": 0.0309,
"step": 214000
},
{
"epoch": 3.23,
"learning_rate": 2.353321575543798e-05,
"loss": 0.0315,
"step": 214500
},
{
"epoch": 3.24,
"learning_rate": 2.3518141666289814e-05,
"loss": 0.0304,
"step": 215000
},
{
"epoch": 3.25,
"learning_rate": 2.3503067577141652e-05,
"loss": 0.0309,
"step": 215500
},
{
"epoch": 3.26,
"learning_rate": 2.348799348799349e-05,
"loss": 0.0308,
"step": 216000
},
{
"epoch": 3.26,
"learning_rate": 2.3472919398845327e-05,
"loss": 0.0304,
"step": 216500
},
{
"epoch": 3.27,
"learning_rate": 2.345784530969716e-05,
"loss": 0.0303,
"step": 217000
},
{
"epoch": 3.28,
"learning_rate": 2.3442771220548998e-05,
"loss": 0.0302,
"step": 217500
},
{
"epoch": 3.29,
"learning_rate": 2.3427697131400836e-05,
"loss": 0.0298,
"step": 218000
},
{
"epoch": 3.29,
"learning_rate": 2.3412623042252673e-05,
"loss": 0.0297,
"step": 218500
},
{
"epoch": 3.3,
"learning_rate": 2.3397548953104507e-05,
"loss": 0.0318,
"step": 219000
},
{
"epoch": 3.31,
"learning_rate": 2.3382474863956348e-05,
"loss": 0.0321,
"step": 219500
},
{
"epoch": 3.32,
"learning_rate": 2.3367400774808182e-05,
"loss": 0.0322,
"step": 220000
},
{
"epoch": 3.32,
"learning_rate": 2.335232668566002e-05,
"loss": 0.0321,
"step": 220500
},
{
"epoch": 3.33,
"learning_rate": 2.3337252596511857e-05,
"loss": 0.0291,
"step": 221000
},
{
"epoch": 3.34,
"learning_rate": 2.3322178507363694e-05,
"loss": 0.0319,
"step": 221500
},
{
"epoch": 3.35,
"learning_rate": 2.3307104418215528e-05,
"loss": 0.0306,
"step": 222000
},
{
"epoch": 3.35,
"learning_rate": 2.329203032906737e-05,
"loss": 0.0313,
"step": 222500
},
{
"epoch": 3.36,
"learning_rate": 2.3276956239919203e-05,
"loss": 0.0313,
"step": 223000
},
{
"epoch": 3.37,
"learning_rate": 2.326188215077104e-05,
"loss": 0.0297,
"step": 223500
},
{
"epoch": 3.38,
"learning_rate": 2.3246808061622878e-05,
"loss": 0.0302,
"step": 224000
},
{
"epoch": 3.38,
"learning_rate": 2.3231733972474715e-05,
"loss": 0.0321,
"step": 224500
},
{
"epoch": 3.39,
"learning_rate": 2.321665988332655e-05,
"loss": 0.0326,
"step": 225000
},
{
"epoch": 3.4,
"learning_rate": 2.3201585794178387e-05,
"loss": 0.0294,
"step": 225500
},
{
"epoch": 3.41,
"learning_rate": 2.3186511705030224e-05,
"loss": 0.0306,
"step": 226000
},
{
"epoch": 3.41,
"learning_rate": 2.317143761588206e-05,
"loss": 0.0316,
"step": 226500
},
{
"epoch": 3.42,
"learning_rate": 2.3156363526733896e-05,
"loss": 0.0307,
"step": 227000
},
{
"epoch": 3.43,
"learning_rate": 2.3141289437585736e-05,
"loss": 0.0316,
"step": 227500
},
{
"epoch": 3.44,
"learning_rate": 2.312621534843757e-05,
"loss": 0.0308,
"step": 228000
},
{
"epoch": 3.44,
"learning_rate": 2.3111141259289408e-05,
"loss": 0.0327,
"step": 228500
},
{
"epoch": 3.45,
"learning_rate": 2.3096067170141245e-05,
"loss": 0.032,
"step": 229000
},
{
"epoch": 3.46,
"learning_rate": 2.3080993080993083e-05,
"loss": 0.0324,
"step": 229500
},
{
"epoch": 3.47,
"learning_rate": 2.3065918991844917e-05,
"loss": 0.0315,
"step": 230000
},
{
"epoch": 3.47,
"learning_rate": 2.3050844902696758e-05,
"loss": 0.03,
"step": 230500
},
{
"epoch": 3.48,
"learning_rate": 2.303577081354859e-05,
"loss": 0.0331,
"step": 231000
},
{
"epoch": 3.49,
"learning_rate": 2.302069672440043e-05,
"loss": 0.0316,
"step": 231500
},
{
"epoch": 3.5,
"learning_rate": 2.3005622635252266e-05,
"loss": 0.0319,
"step": 232000
},
{
"epoch": 3.5,
"learning_rate": 2.2990548546104104e-05,
"loss": 0.0317,
"step": 232500
},
{
"epoch": 3.51,
"learning_rate": 2.2975474456955938e-05,
"loss": 0.033,
"step": 233000
},
{
"epoch": 3.52,
"learning_rate": 2.2960400367807775e-05,
"loss": 0.0328,
"step": 233500
},
{
"epoch": 3.53,
"learning_rate": 2.2945326278659613e-05,
"loss": 0.0341,
"step": 234000
},
{
"epoch": 3.53,
"learning_rate": 2.293025218951145e-05,
"loss": 0.0319,
"step": 234500
},
{
"epoch": 3.54,
"learning_rate": 2.2915178100363284e-05,
"loss": 0.0314,
"step": 235000
},
{
"epoch": 3.55,
"learning_rate": 2.2900104011215125e-05,
"loss": 0.0314,
"step": 235500
},
{
"epoch": 3.56,
"learning_rate": 2.288502992206696e-05,
"loss": 0.0314,
"step": 236000
},
{
"epoch": 3.57,
"learning_rate": 2.2869955832918796e-05,
"loss": 0.0309,
"step": 236500
},
{
"epoch": 3.57,
"learning_rate": 2.2854881743770634e-05,
"loss": 0.0323,
"step": 237000
},
{
"epoch": 3.58,
"learning_rate": 2.283980765462247e-05,
"loss": 0.0331,
"step": 237500
},
{
"epoch": 3.59,
"learning_rate": 2.2824733565474305e-05,
"loss": 0.029,
"step": 238000
},
{
"epoch": 3.6,
"learning_rate": 2.2809659476326146e-05,
"loss": 0.0331,
"step": 238500
},
{
"epoch": 3.6,
"learning_rate": 2.279458538717798e-05,
"loss": 0.0308,
"step": 239000
},
{
"epoch": 3.61,
"learning_rate": 2.2779511298029818e-05,
"loss": 0.0332,
"step": 239500
},
{
"epoch": 3.62,
"learning_rate": 2.276443720888165e-05,
"loss": 0.0322,
"step": 240000
},
{
"epoch": 3.63,
"learning_rate": 2.2749363119733492e-05,
"loss": 0.0315,
"step": 240500
},
{
"epoch": 3.63,
"learning_rate": 2.2734289030585326e-05,
"loss": 0.0314,
"step": 241000
},
{
"epoch": 3.64,
"learning_rate": 2.2719214941437164e-05,
"loss": 0.0314,
"step": 241500
},
{
"epoch": 3.65,
"learning_rate": 2.2704140852289e-05,
"loss": 0.0335,
"step": 242000
},
{
"epoch": 3.66,
"learning_rate": 2.268906676314084e-05,
"loss": 0.031,
"step": 242500
},
{
"epoch": 3.66,
"learning_rate": 2.2673992673992673e-05,
"loss": 0.0319,
"step": 243000
},
{
"epoch": 3.67,
"learning_rate": 2.2658918584844514e-05,
"loss": 0.0322,
"step": 243500
},
{
"epoch": 3.68,
"learning_rate": 2.2643844495696348e-05,
"loss": 0.031,
"step": 244000
},
{
"epoch": 3.69,
"learning_rate": 2.2628770406548185e-05,
"loss": 0.0318,
"step": 244500
},
{
"epoch": 3.69,
"learning_rate": 2.2613696317400022e-05,
"loss": 0.0297,
"step": 245000
},
{
"epoch": 3.7,
"learning_rate": 2.259862222825186e-05,
"loss": 0.031,
"step": 245500
},
{
"epoch": 3.71,
"learning_rate": 2.2583548139103694e-05,
"loss": 0.0298,
"step": 246000
},
{
"epoch": 3.72,
"learning_rate": 2.2568474049955535e-05,
"loss": 0.0323,
"step": 246500
},
{
"epoch": 3.72,
"learning_rate": 2.255339996080737e-05,
"loss": 0.032,
"step": 247000
},
{
"epoch": 3.73,
"learning_rate": 2.2538325871659206e-05,
"loss": 0.0309,
"step": 247500
},
{
"epoch": 3.74,
"learning_rate": 2.252325178251104e-05,
"loss": 0.0325,
"step": 248000
},
{
"epoch": 3.75,
"learning_rate": 2.250817769336288e-05,
"loss": 0.0315,
"step": 248500
},
{
"epoch": 3.75,
"learning_rate": 2.2493103604214715e-05,
"loss": 0.031,
"step": 249000
},
{
"epoch": 3.76,
"learning_rate": 2.2478029515066552e-05,
"loss": 0.0333,
"step": 249500
},
{
"epoch": 3.77,
"learning_rate": 2.246295542591839e-05,
"loss": 0.0322,
"step": 250000
},
{
"epoch": 3.78,
"learning_rate": 2.2447881336770227e-05,
"loss": 0.0308,
"step": 250500
},
{
"epoch": 3.78,
"learning_rate": 2.243280724762206e-05,
"loss": 0.0293,
"step": 251000
},
{
"epoch": 3.79,
"learning_rate": 2.2417733158473902e-05,
"loss": 0.033,
"step": 251500
},
{
"epoch": 3.8,
"learning_rate": 2.2402659069325736e-05,
"loss": 0.0325,
"step": 252000
},
{
"epoch": 3.81,
"learning_rate": 2.2387584980177574e-05,
"loss": 0.0318,
"step": 252500
},
{
"epoch": 3.81,
"learning_rate": 2.237251089102941e-05,
"loss": 0.0308,
"step": 253000
},
{
"epoch": 3.82,
"learning_rate": 2.235743680188125e-05,
"loss": 0.0339,
"step": 253500
},
{
"epoch": 3.83,
"learning_rate": 2.2342362712733082e-05,
"loss": 0.0306,
"step": 254000
},
{
"epoch": 3.84,
"learning_rate": 2.2327288623584923e-05,
"loss": 0.0314,
"step": 254500
},
{
"epoch": 3.84,
"learning_rate": 2.2312214534436757e-05,
"loss": 0.0307,
"step": 255000
},
{
"epoch": 3.85,
"learning_rate": 2.2297140445288595e-05,
"loss": 0.0311,
"step": 255500
},
{
"epoch": 3.86,
"learning_rate": 2.228206635614043e-05,
"loss": 0.0319,
"step": 256000
},
{
"epoch": 3.87,
"learning_rate": 2.226699226699227e-05,
"loss": 0.0314,
"step": 256500
},
{
"epoch": 3.87,
"learning_rate": 2.2251918177844104e-05,
"loss": 0.0312,
"step": 257000
},
{
"epoch": 3.88,
"learning_rate": 2.223684408869594e-05,
"loss": 0.0321,
"step": 257500
},
{
"epoch": 3.89,
"learning_rate": 2.222176999954778e-05,
"loss": 0.032,
"step": 258000
},
{
"epoch": 3.9,
"learning_rate": 2.2206695910399616e-05,
"loss": 0.0327,
"step": 258500
},
{
"epoch": 3.9,
"learning_rate": 2.219162182125145e-05,
"loss": 0.0298,
"step": 259000
},
{
"epoch": 3.91,
"learning_rate": 2.217654773210329e-05,
"loss": 0.0311,
"step": 259500
},
{
"epoch": 3.92,
"learning_rate": 2.2161473642955125e-05,
"loss": 0.0305,
"step": 260000
},
{
"epoch": 3.93,
"learning_rate": 2.2146399553806962e-05,
"loss": 0.0306,
"step": 260500
},
{
"epoch": 3.93,
"learning_rate": 2.21313254646588e-05,
"loss": 0.0311,
"step": 261000
},
{
"epoch": 3.94,
"learning_rate": 2.2116251375510637e-05,
"loss": 0.0336,
"step": 261500
},
{
"epoch": 3.95,
"learning_rate": 2.210117728636247e-05,
"loss": 0.0305,
"step": 262000
},
{
"epoch": 3.96,
"learning_rate": 2.208610319721431e-05,
"loss": 0.0312,
"step": 262500
},
{
"epoch": 3.96,
"learning_rate": 2.2071029108066146e-05,
"loss": 0.0325,
"step": 263000
},
{
"epoch": 3.97,
"learning_rate": 2.2055955018917983e-05,
"loss": 0.0321,
"step": 263500
},
{
"epoch": 3.98,
"learning_rate": 2.2040880929769817e-05,
"loss": 0.031,
"step": 264000
},
{
"epoch": 3.99,
"learning_rate": 2.2025806840621658e-05,
"loss": 0.0325,
"step": 264500
},
{
"epoch": 3.99,
"learning_rate": 2.2010732751473492e-05,
"loss": 0.0317,
"step": 265000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9846929860097222,
"eval_f1": 0.929668210030934,
"eval_loss": 0.04334929585456848,
"eval_precision": 0.914498960760849,
"eval_recall": 0.9453491873591002,
"eval_runtime": 269.8313,
"eval_samples_per_second": 437.029,
"eval_steps_per_second": 27.317,
"step": 265356
},
{
"epoch": 4.0,
"learning_rate": 2.199565866232533e-05,
"loss": 0.0277,
"step": 265500
},
{
"epoch": 4.01,
"learning_rate": 2.1980584573177167e-05,
"loss": 0.0244,
"step": 266000
},
{
"epoch": 4.02,
"learning_rate": 2.1965510484029004e-05,
"loss": 0.0244,
"step": 266500
},
{
"epoch": 4.02,
"learning_rate": 2.195043639488084e-05,
"loss": 0.0252,
"step": 267000
},
{
"epoch": 4.03,
"learning_rate": 2.193536230573268e-05,
"loss": 0.0242,
"step": 267500
},
{
"epoch": 4.04,
"learning_rate": 2.1920288216584513e-05,
"loss": 0.0245,
"step": 268000
},
{
"epoch": 4.05,
"learning_rate": 2.190521412743635e-05,
"loss": 0.0253,
"step": 268500
},
{
"epoch": 4.05,
"learning_rate": 2.1890140038288188e-05,
"loss": 0.0246,
"step": 269000
},
{
"epoch": 4.06,
"learning_rate": 2.1875065949140026e-05,
"loss": 0.0255,
"step": 269500
},
{
"epoch": 4.07,
"learning_rate": 2.185999185999186e-05,
"loss": 0.0259,
"step": 270000
},
{
"epoch": 4.08,
"learning_rate": 2.1844917770843697e-05,
"loss": 0.0232,
"step": 270500
},
{
"epoch": 4.09,
"learning_rate": 2.1829843681695534e-05,
"loss": 0.0239,
"step": 271000
},
{
"epoch": 4.09,
"learning_rate": 2.1814769592547372e-05,
"loss": 0.0254,
"step": 271500
},
{
"epoch": 4.1,
"learning_rate": 2.1799695503399206e-05,
"loss": 0.0249,
"step": 272000
},
{
"epoch": 4.11,
"learning_rate": 2.1784621414251047e-05,
"loss": 0.0247,
"step": 272500
},
{
"epoch": 4.12,
"learning_rate": 2.176954732510288e-05,
"loss": 0.0253,
"step": 273000
},
{
"epoch": 4.12,
"learning_rate": 2.1754473235954718e-05,
"loss": 0.0259,
"step": 273500
},
{
"epoch": 4.13,
"learning_rate": 2.1739399146806556e-05,
"loss": 0.0258,
"step": 274000
},
{
"epoch": 4.14,
"learning_rate": 2.1724325057658393e-05,
"loss": 0.0247,
"step": 274500
},
{
"epoch": 4.15,
"learning_rate": 2.1709250968510227e-05,
"loss": 0.0271,
"step": 275000
},
{
"epoch": 4.15,
"learning_rate": 2.1694176879362068e-05,
"loss": 0.0253,
"step": 275500
},
{
"epoch": 4.16,
"learning_rate": 2.1679102790213902e-05,
"loss": 0.0258,
"step": 276000
},
{
"epoch": 4.17,
"learning_rate": 2.166402870106574e-05,
"loss": 0.0258,
"step": 276500
},
{
"epoch": 4.18,
"learning_rate": 2.1648954611917577e-05,
"loss": 0.0261,
"step": 277000
},
{
"epoch": 4.18,
"learning_rate": 2.1633880522769414e-05,
"loss": 0.0243,
"step": 277500
},
{
"epoch": 4.19,
"learning_rate": 2.1618806433621248e-05,
"loss": 0.0253,
"step": 278000
},
{
"epoch": 4.2,
"learning_rate": 2.1603732344473086e-05,
"loss": 0.0244,
"step": 278500
},
{
"epoch": 4.21,
"learning_rate": 2.1588658255324923e-05,
"loss": 0.0266,
"step": 279000
},
{
"epoch": 4.21,
"learning_rate": 2.157358416617676e-05,
"loss": 0.0251,
"step": 279500
},
{
"epoch": 4.22,
"learning_rate": 2.1558510077028595e-05,
"loss": 0.0242,
"step": 280000
},
{
"epoch": 4.23,
"learning_rate": 2.1543435987880435e-05,
"loss": 0.0264,
"step": 280500
},
{
"epoch": 4.24,
"learning_rate": 2.152836189873227e-05,
"loss": 0.025,
"step": 281000
},
{
"epoch": 4.24,
"learning_rate": 2.1513287809584107e-05,
"loss": 0.0257,
"step": 281500
},
{
"epoch": 4.25,
"learning_rate": 2.1498213720435944e-05,
"loss": 0.026,
"step": 282000
},
{
"epoch": 4.26,
"learning_rate": 2.148313963128778e-05,
"loss": 0.0274,
"step": 282500
},
{
"epoch": 4.27,
"learning_rate": 2.1468065542139616e-05,
"loss": 0.0264,
"step": 283000
},
{
"epoch": 4.27,
"learning_rate": 2.1452991452991456e-05,
"loss": 0.0239,
"step": 283500
},
{
"epoch": 4.28,
"learning_rate": 2.143791736384329e-05,
"loss": 0.0258,
"step": 284000
},
{
"epoch": 4.29,
"learning_rate": 2.1422843274695128e-05,
"loss": 0.0253,
"step": 284500
},
{
"epoch": 4.3,
"learning_rate": 2.1407769185546965e-05,
"loss": 0.026,
"step": 285000
},
{
"epoch": 4.3,
"learning_rate": 2.1392695096398803e-05,
"loss": 0.0253,
"step": 285500
},
{
"epoch": 4.31,
"learning_rate": 2.1377621007250637e-05,
"loss": 0.0255,
"step": 286000
},
{
"epoch": 4.32,
"learning_rate": 2.1362546918102474e-05,
"loss": 0.0283,
"step": 286500
},
{
"epoch": 4.33,
"learning_rate": 2.134747282895431e-05,
"loss": 0.0261,
"step": 287000
},
{
"epoch": 4.33,
"learning_rate": 2.133239873980615e-05,
"loss": 0.0253,
"step": 287500
},
{
"epoch": 4.34,
"learning_rate": 2.1317324650657983e-05,
"loss": 0.0271,
"step": 288000
},
{
"epoch": 4.35,
"learning_rate": 2.1302250561509824e-05,
"loss": 0.0252,
"step": 288500
},
{
"epoch": 4.36,
"learning_rate": 2.1287176472361658e-05,
"loss": 0.025,
"step": 289000
},
{
"epoch": 4.36,
"learning_rate": 2.1272102383213495e-05,
"loss": 0.0256,
"step": 289500
},
{
"epoch": 4.37,
"learning_rate": 2.1257028294065333e-05,
"loss": 0.0253,
"step": 290000
},
{
"epoch": 4.38,
"learning_rate": 2.124195420491717e-05,
"loss": 0.0259,
"step": 290500
},
{
"epoch": 4.39,
"learning_rate": 2.1226880115769004e-05,
"loss": 0.0271,
"step": 291000
},
{
"epoch": 4.39,
"learning_rate": 2.1211806026620845e-05,
"loss": 0.0272,
"step": 291500
},
{
"epoch": 4.4,
"learning_rate": 2.119673193747268e-05,
"loss": 0.0253,
"step": 292000
},
{
"epoch": 4.41,
"learning_rate": 2.1181657848324517e-05,
"loss": 0.0262,
"step": 292500
},
{
"epoch": 4.42,
"learning_rate": 2.1166583759176354e-05,
"loss": 0.0251,
"step": 293000
},
{
"epoch": 4.42,
"learning_rate": 2.115150967002819e-05,
"loss": 0.0264,
"step": 293500
},
{
"epoch": 4.43,
"learning_rate": 2.1136435580880025e-05,
"loss": 0.0258,
"step": 294000
},
{
"epoch": 4.44,
"learning_rate": 2.1121361491731863e-05,
"loss": 0.0251,
"step": 294500
},
{
"epoch": 4.45,
"learning_rate": 2.11062874025837e-05,
"loss": 0.0244,
"step": 295000
},
{
"epoch": 4.45,
"learning_rate": 2.1091213313435538e-05,
"loss": 0.026,
"step": 295500
},
{
"epoch": 4.46,
"learning_rate": 2.107613922428737e-05,
"loss": 0.0262,
"step": 296000
},
{
"epoch": 4.47,
"learning_rate": 2.1061065135139212e-05,
"loss": 0.0261,
"step": 296500
},
{
"epoch": 4.48,
"learning_rate": 2.1045991045991047e-05,
"loss": 0.0259,
"step": 297000
},
{
"epoch": 4.48,
"learning_rate": 2.1030916956842884e-05,
"loss": 0.0254,
"step": 297500
},
{
"epoch": 4.49,
"learning_rate": 2.101584286769472e-05,
"loss": 0.0268,
"step": 298000
},
{
"epoch": 4.5,
"learning_rate": 2.1000768778546555e-05,
"loss": 0.0276,
"step": 298500
},
{
"epoch": 4.51,
"learning_rate": 2.0985694689398393e-05,
"loss": 0.0259,
"step": 299000
},
{
"epoch": 4.51,
"learning_rate": 2.097062060025023e-05,
"loss": 0.0259,
"step": 299500
},
{
"epoch": 4.52,
"learning_rate": 2.0955546511102068e-05,
"loss": 0.0265,
"step": 300000
},
{
"epoch": 4.53,
"learning_rate": 2.0940472421953902e-05,
"loss": 0.0268,
"step": 300500
},
{
"epoch": 4.54,
"learning_rate": 2.092539833280574e-05,
"loss": 0.0253,
"step": 301000
},
{
"epoch": 4.54,
"learning_rate": 2.0910324243657577e-05,
"loss": 0.0258,
"step": 301500
},
{
"epoch": 4.55,
"learning_rate": 2.0895250154509414e-05,
"loss": 0.0262,
"step": 302000
},
{
"epoch": 4.56,
"learning_rate": 2.0880176065361248e-05,
"loss": 0.0272,
"step": 302500
},
{
"epoch": 4.57,
"learning_rate": 2.086510197621309e-05,
"loss": 0.0256,
"step": 303000
},
{
"epoch": 4.57,
"learning_rate": 2.0850027887064923e-05,
"loss": 0.0272,
"step": 303500
},
{
"epoch": 4.58,
"learning_rate": 2.083495379791676e-05,
"loss": 0.0257,
"step": 304000
},
{
"epoch": 4.59,
"learning_rate": 2.0819879708768598e-05,
"loss": 0.0267,
"step": 304500
},
{
"epoch": 4.6,
"learning_rate": 2.0804805619620435e-05,
"loss": 0.0275,
"step": 305000
},
{
"epoch": 4.61,
"learning_rate": 2.078973153047227e-05,
"loss": 0.0271,
"step": 305500
},
{
"epoch": 4.61,
"learning_rate": 2.077465744132411e-05,
"loss": 0.0272,
"step": 306000
},
{
"epoch": 4.62,
"learning_rate": 2.0759583352175944e-05,
"loss": 0.0263,
"step": 306500
},
{
"epoch": 4.63,
"learning_rate": 2.074450926302778e-05,
"loss": 0.0262,
"step": 307000
},
{
"epoch": 4.64,
"learning_rate": 2.072943517387962e-05,
"loss": 0.0261,
"step": 307500
},
{
"epoch": 4.64,
"learning_rate": 2.0714361084731456e-05,
"loss": 0.0252,
"step": 308000
},
{
"epoch": 4.65,
"learning_rate": 2.069928699558329e-05,
"loss": 0.0258,
"step": 308500
},
{
"epoch": 4.66,
"learning_rate": 2.0684212906435128e-05,
"loss": 0.0263,
"step": 309000
},
{
"epoch": 4.67,
"learning_rate": 2.0669138817286965e-05,
"loss": 0.0261,
"step": 309500
},
{
"epoch": 4.67,
"learning_rate": 2.0654064728138803e-05,
"loss": 0.0268,
"step": 310000
},
{
"epoch": 4.68,
"learning_rate": 2.0638990638990637e-05,
"loss": 0.0259,
"step": 310500
},
{
"epoch": 4.69,
"learning_rate": 2.0623916549842477e-05,
"loss": 0.0245,
"step": 311000
},
{
"epoch": 4.7,
"learning_rate": 2.060884246069431e-05,
"loss": 0.0255,
"step": 311500
},
{
"epoch": 4.7,
"learning_rate": 2.059376837154615e-05,
"loss": 0.0269,
"step": 312000
},
{
"epoch": 4.71,
"learning_rate": 2.0578694282397986e-05,
"loss": 0.0276,
"step": 312500
},
{
"epoch": 4.72,
"learning_rate": 2.0563620193249824e-05,
"loss": 0.025,
"step": 313000
},
{
"epoch": 4.73,
"learning_rate": 2.0548546104101658e-05,
"loss": 0.0257,
"step": 313500
},
{
"epoch": 4.73,
"learning_rate": 2.05334720149535e-05,
"loss": 0.0289,
"step": 314000
},
{
"epoch": 4.74,
"learning_rate": 2.0518397925805333e-05,
"loss": 0.0263,
"step": 314500
},
{
"epoch": 4.75,
"learning_rate": 2.050332383665717e-05,
"loss": 0.026,
"step": 315000
},
{
"epoch": 4.76,
"learning_rate": 2.0488249747509007e-05,
"loss": 0.0256,
"step": 315500
},
{
"epoch": 4.76,
"learning_rate": 2.0473175658360845e-05,
"loss": 0.0261,
"step": 316000
},
{
"epoch": 4.77,
"learning_rate": 2.045810156921268e-05,
"loss": 0.0271,
"step": 316500
},
{
"epoch": 4.78,
"learning_rate": 2.0443027480064516e-05,
"loss": 0.0259,
"step": 317000
},
{
"epoch": 4.79,
"learning_rate": 2.0427953390916354e-05,
"loss": 0.0256,
"step": 317500
},
{
"epoch": 4.79,
"learning_rate": 2.041287930176819e-05,
"loss": 0.0272,
"step": 318000
},
{
"epoch": 4.8,
"learning_rate": 2.0397805212620025e-05,
"loss": 0.0264,
"step": 318500
},
{
"epoch": 4.81,
"learning_rate": 2.0382731123471866e-05,
"loss": 0.0285,
"step": 319000
},
{
"epoch": 4.82,
"learning_rate": 2.03676570343237e-05,
"loss": 0.027,
"step": 319500
},
{
"epoch": 4.82,
"learning_rate": 2.0352582945175537e-05,
"loss": 0.0267,
"step": 320000
},
{
"epoch": 4.83,
"learning_rate": 2.0337508856027375e-05,
"loss": 0.0267,
"step": 320500
},
{
"epoch": 4.84,
"learning_rate": 2.0322434766879212e-05,
"loss": 0.0257,
"step": 321000
},
{
"epoch": 4.85,
"learning_rate": 2.0307360677731046e-05,
"loss": 0.0262,
"step": 321500
},
{
"epoch": 4.85,
"learning_rate": 2.0292286588582887e-05,
"loss": 0.0274,
"step": 322000
},
{
"epoch": 4.86,
"learning_rate": 2.027721249943472e-05,
"loss": 0.0275,
"step": 322500
},
{
"epoch": 4.87,
"learning_rate": 2.026213841028656e-05,
"loss": 0.0261,
"step": 323000
},
{
"epoch": 4.88,
"learning_rate": 2.0247064321138396e-05,
"loss": 0.0265,
"step": 323500
},
{
"epoch": 4.88,
"learning_rate": 2.0231990231990233e-05,
"loss": 0.0261,
"step": 324000
},
{
"epoch": 4.89,
"learning_rate": 2.0216916142842067e-05,
"loss": 0.027,
"step": 324500
},
{
"epoch": 4.9,
"learning_rate": 2.0201842053693905e-05,
"loss": 0.0263,
"step": 325000
},
{
"epoch": 4.91,
"learning_rate": 2.0186767964545742e-05,
"loss": 0.0267,
"step": 325500
},
{
"epoch": 4.91,
"learning_rate": 2.017169387539758e-05,
"loss": 0.0263,
"step": 326000
},
{
"epoch": 4.92,
"learning_rate": 2.0156619786249414e-05,
"loss": 0.026,
"step": 326500
},
{
"epoch": 4.93,
"learning_rate": 2.0141545697101255e-05,
"loss": 0.0266,
"step": 327000
},
{
"epoch": 4.94,
"learning_rate": 2.012647160795309e-05,
"loss": 0.0254,
"step": 327500
},
{
"epoch": 4.94,
"learning_rate": 2.0111397518804926e-05,
"loss": 0.0279,
"step": 328000
},
{
"epoch": 4.95,
"learning_rate": 2.0096323429656763e-05,
"loss": 0.0277,
"step": 328500
},
{
"epoch": 4.96,
"learning_rate": 2.00812493405086e-05,
"loss": 0.0262,
"step": 329000
},
{
"epoch": 4.97,
"learning_rate": 2.0066175251360435e-05,
"loss": 0.0273,
"step": 329500
},
{
"epoch": 4.97,
"learning_rate": 2.0051101162212276e-05,
"loss": 0.0263,
"step": 330000
},
{
"epoch": 4.98,
"learning_rate": 2.003602707306411e-05,
"loss": 0.0266,
"step": 330500
},
{
"epoch": 4.99,
"learning_rate": 2.0020952983915947e-05,
"loss": 0.0262,
"step": 331000
},
{
"epoch": 5.0,
"learning_rate": 2.000587889476778e-05,
"loss": 0.0257,
"step": 331500
},
{
"epoch": 5.0,
"eval_accuracy": 0.9854380991780325,
"eval_f1": 0.9340379855331353,
"eval_loss": 0.04560817405581474,
"eval_precision": 0.9195563196454343,
"eval_recall": 0.9489830798154162,
"eval_runtime": 243.78,
"eval_samples_per_second": 483.731,
"eval_steps_per_second": 30.236,
"step": 331695
},
{
"epoch": 5.0,
"learning_rate": 1.9990804805619622e-05,
"loss": 0.0227,
"step": 332000
},
{
"epoch": 5.01,
"learning_rate": 1.9975730716471456e-05,
"loss": 0.0196,
"step": 332500
},
{
"epoch": 5.02,
"learning_rate": 1.9960656627323293e-05,
"loss": 0.0191,
"step": 333000
},
{
"epoch": 5.03,
"learning_rate": 1.994558253817513e-05,
"loss": 0.0217,
"step": 333500
},
{
"epoch": 5.03,
"learning_rate": 1.993050844902697e-05,
"loss": 0.0207,
"step": 334000
},
{
"epoch": 5.04,
"learning_rate": 1.9915434359878802e-05,
"loss": 0.021,
"step": 334500
},
{
"epoch": 5.05,
"learning_rate": 1.9900360270730643e-05,
"loss": 0.0202,
"step": 335000
},
{
"epoch": 5.06,
"learning_rate": 1.9885286181582477e-05,
"loss": 0.0207,
"step": 335500
},
{
"epoch": 5.06,
"learning_rate": 1.9870212092434315e-05,
"loss": 0.0202,
"step": 336000
},
{
"epoch": 5.07,
"learning_rate": 1.9855138003286152e-05,
"loss": 0.0208,
"step": 336500
},
{
"epoch": 5.08,
"learning_rate": 1.984006391413799e-05,
"loss": 0.021,
"step": 337000
},
{
"epoch": 5.09,
"learning_rate": 1.9824989824989823e-05,
"loss": 0.0198,
"step": 337500
},
{
"epoch": 5.1,
"learning_rate": 1.9809915735841664e-05,
"loss": 0.0204,
"step": 338000
},
{
"epoch": 5.1,
"learning_rate": 1.97948416466935e-05,
"loss": 0.0204,
"step": 338500
},
{
"epoch": 5.11,
"learning_rate": 1.9779767557545336e-05,
"loss": 0.0207,
"step": 339000
},
{
"epoch": 5.12,
"learning_rate": 1.976469346839717e-05,
"loss": 0.0203,
"step": 339500
},
{
"epoch": 5.13,
"learning_rate": 1.974961937924901e-05,
"loss": 0.021,
"step": 340000
},
{
"epoch": 5.13,
"learning_rate": 1.9734545290100845e-05,
"loss": 0.0205,
"step": 340500
},
{
"epoch": 5.14,
"learning_rate": 1.9719471200952682e-05,
"loss": 0.0204,
"step": 341000
},
{
"epoch": 5.15,
"learning_rate": 1.970439711180452e-05,
"loss": 0.0202,
"step": 341500
},
{
"epoch": 5.16,
"learning_rate": 1.9689323022656357e-05,
"loss": 0.0206,
"step": 342000
},
{
"epoch": 5.16,
"learning_rate": 1.967424893350819e-05,
"loss": 0.0216,
"step": 342500
},
{
"epoch": 5.17,
"learning_rate": 1.9659174844360032e-05,
"loss": 0.0214,
"step": 343000
},
{
"epoch": 5.18,
"learning_rate": 1.9644100755211866e-05,
"loss": 0.022,
"step": 343500
},
{
"epoch": 5.19,
"learning_rate": 1.9629026666063703e-05,
"loss": 0.0211,
"step": 344000
},
{
"epoch": 5.19,
"learning_rate": 1.961395257691554e-05,
"loss": 0.0208,
"step": 344500
},
{
"epoch": 5.2,
"learning_rate": 1.9598878487767378e-05,
"loss": 0.0216,
"step": 345000
},
{
"epoch": 5.21,
"learning_rate": 1.9583804398619212e-05,
"loss": 0.0228,
"step": 345500
},
{
"epoch": 5.22,
"learning_rate": 1.9568730309471053e-05,
"loss": 0.0218,
"step": 346000
},
{
"epoch": 5.22,
"learning_rate": 1.9553656220322887e-05,
"loss": 0.0237,
"step": 346500
},
{
"epoch": 5.23,
"learning_rate": 1.9538582131174724e-05,
"loss": 0.0214,
"step": 347000
},
{
"epoch": 5.24,
"learning_rate": 1.952350804202656e-05,
"loss": 0.0223,
"step": 347500
},
{
"epoch": 5.25,
"learning_rate": 1.95084339528784e-05,
"loss": 0.0226,
"step": 348000
},
{
"epoch": 5.25,
"learning_rate": 1.9493359863730233e-05,
"loss": 0.0219,
"step": 348500
},
{
"epoch": 5.26,
"learning_rate": 1.947828577458207e-05,
"loss": 0.0235,
"step": 349000
},
{
"epoch": 5.27,
"learning_rate": 1.9463211685433908e-05,
"loss": 0.0201,
"step": 349500
},
{
"epoch": 5.28,
"learning_rate": 1.9448137596285745e-05,
"loss": 0.0216,
"step": 350000
},
{
"epoch": 5.28,
"learning_rate": 1.943306350713758e-05,
"loss": 0.0218,
"step": 350500
},
{
"epoch": 5.29,
"learning_rate": 1.941798941798942e-05,
"loss": 0.0219,
"step": 351000
},
{
"epoch": 5.3,
"learning_rate": 1.9402915328841254e-05,
"loss": 0.022,
"step": 351500
},
{
"epoch": 5.31,
"learning_rate": 1.9387841239693092e-05,
"loss": 0.0231,
"step": 352000
},
{
"epoch": 5.31,
"learning_rate": 1.937276715054493e-05,
"loss": 0.0216,
"step": 352500
},
{
"epoch": 5.32,
"learning_rate": 1.9357693061396767e-05,
"loss": 0.0205,
"step": 353000
},
{
"epoch": 5.33,
"learning_rate": 1.93426189722486e-05,
"loss": 0.0212,
"step": 353500
},
{
"epoch": 5.34,
"learning_rate": 1.9327544883100438e-05,
"loss": 0.021,
"step": 354000
},
{
"epoch": 5.34,
"learning_rate": 1.9312470793952276e-05,
"loss": 0.0227,
"step": 354500
},
{
"epoch": 5.35,
"learning_rate": 1.9297396704804113e-05,
"loss": 0.0233,
"step": 355000
},
{
"epoch": 5.36,
"learning_rate": 1.9282322615655947e-05,
"loss": 0.0232,
"step": 355500
},
{
"epoch": 5.37,
"learning_rate": 1.9267248526507788e-05,
"loss": 0.0214,
"step": 356000
},
{
"epoch": 5.37,
"learning_rate": 1.9252174437359622e-05,
"loss": 0.0213,
"step": 356500
},
{
"epoch": 5.38,
"learning_rate": 1.923710034821146e-05,
"loss": 0.0216,
"step": 357000
},
{
"epoch": 5.39,
"learning_rate": 1.9222026259063297e-05,
"loss": 0.0224,
"step": 357500
},
{
"epoch": 5.4,
"learning_rate": 1.9206952169915134e-05,
"loss": 0.0218,
"step": 358000
},
{
"epoch": 5.4,
"learning_rate": 1.9191878080766968e-05,
"loss": 0.0216,
"step": 358500
},
{
"epoch": 5.41,
"learning_rate": 1.917680399161881e-05,
"loss": 0.0221,
"step": 359000
},
{
"epoch": 5.42,
"learning_rate": 1.9161729902470643e-05,
"loss": 0.0226,
"step": 359500
},
{
"epoch": 5.43,
"learning_rate": 1.914665581332248e-05,
"loss": 0.0225,
"step": 360000
},
{
"epoch": 5.43,
"learning_rate": 1.9131581724174318e-05,
"loss": 0.0216,
"step": 360500
},
{
"epoch": 5.44,
"learning_rate": 1.9116507635026155e-05,
"loss": 0.0218,
"step": 361000
},
{
"epoch": 5.45,
"learning_rate": 1.910143354587799e-05,
"loss": 0.0232,
"step": 361500
},
{
"epoch": 5.46,
"learning_rate": 1.9086359456729827e-05,
"loss": 0.0216,
"step": 362000
},
{
"epoch": 5.46,
"learning_rate": 1.9071285367581664e-05,
"loss": 0.0216,
"step": 362500
},
{
"epoch": 5.47,
"learning_rate": 1.90562112784335e-05,
"loss": 0.0213,
"step": 363000
},
{
"epoch": 5.48,
"learning_rate": 1.9041137189285336e-05,
"loss": 0.0217,
"step": 363500
},
{
"epoch": 5.49,
"learning_rate": 1.9026063100137176e-05,
"loss": 0.0225,
"step": 364000
},
{
"epoch": 5.49,
"learning_rate": 1.901098901098901e-05,
"loss": 0.0216,
"step": 364500
},
{
"epoch": 5.5,
"learning_rate": 1.8995914921840848e-05,
"loss": 0.0224,
"step": 365000
},
{
"epoch": 5.51,
"learning_rate": 1.8980840832692685e-05,
"loss": 0.0218,
"step": 365500
},
{
"epoch": 5.52,
"learning_rate": 1.8965766743544523e-05,
"loss": 0.0237,
"step": 366000
},
{
"epoch": 5.52,
"learning_rate": 1.8950692654396357e-05,
"loss": 0.0216,
"step": 366500
},
{
"epoch": 5.53,
"learning_rate": 1.8935618565248198e-05,
"loss": 0.0213,
"step": 367000
},
{
"epoch": 5.54,
"learning_rate": 1.892054447610003e-05,
"loss": 0.0227,
"step": 367500
},
{
"epoch": 5.55,
"learning_rate": 1.890547038695187e-05,
"loss": 0.0226,
"step": 368000
},
{
"epoch": 5.55,
"learning_rate": 1.8890396297803706e-05,
"loss": 0.0223,
"step": 368500
},
{
"epoch": 5.56,
"learning_rate": 1.8875322208655544e-05,
"loss": 0.0216,
"step": 369000
},
{
"epoch": 5.57,
"learning_rate": 1.8860248119507378e-05,
"loss": 0.0214,
"step": 369500
},
{
"epoch": 5.58,
"learning_rate": 1.8845174030359215e-05,
"loss": 0.0227,
"step": 370000
},
{
"epoch": 5.58,
"learning_rate": 1.8830099941211053e-05,
"loss": 0.0223,
"step": 370500
},
{
"epoch": 5.59,
"learning_rate": 1.881502585206289e-05,
"loss": 0.0221,
"step": 371000
},
{
"epoch": 5.6,
"learning_rate": 1.8799951762914724e-05,
"loss": 0.0224,
"step": 371500
},
{
"epoch": 5.61,
"learning_rate": 1.8784877673766565e-05,
"loss": 0.0221,
"step": 372000
},
{
"epoch": 5.62,
"learning_rate": 1.87698035846184e-05,
"loss": 0.0221,
"step": 372500
},
{
"epoch": 5.62,
"learning_rate": 1.8754729495470236e-05,
"loss": 0.022,
"step": 373000
},
{
"epoch": 5.63,
"learning_rate": 1.8739655406322074e-05,
"loss": 0.0224,
"step": 373500
},
{
"epoch": 5.64,
"learning_rate": 1.872458131717391e-05,
"loss": 0.023,
"step": 374000
},
{
"epoch": 5.65,
"learning_rate": 1.8709507228025745e-05,
"loss": 0.0231,
"step": 374500
},
{
"epoch": 5.65,
"learning_rate": 1.8694433138877586e-05,
"loss": 0.0247,
"step": 375000
},
{
"epoch": 5.66,
"learning_rate": 1.867935904972942e-05,
"loss": 0.0226,
"step": 375500
},
{
"epoch": 5.67,
"learning_rate": 1.8664284960581258e-05,
"loss": 0.0215,
"step": 376000
},
{
"epoch": 5.68,
"learning_rate": 1.8649210871433095e-05,
"loss": 0.023,
"step": 376500
},
{
"epoch": 5.68,
"learning_rate": 1.8634136782284932e-05,
"loss": 0.0233,
"step": 377000
},
{
"epoch": 5.69,
"learning_rate": 1.8619062693136766e-05,
"loss": 0.0225,
"step": 377500
},
{
"epoch": 5.7,
"learning_rate": 1.8603988603988604e-05,
"loss": 0.0231,
"step": 378000
},
{
"epoch": 5.71,
"learning_rate": 1.858891451484044e-05,
"loss": 0.0225,
"step": 378500
},
{
"epoch": 5.71,
"learning_rate": 1.857384042569228e-05,
"loss": 0.0237,
"step": 379000
},
{
"epoch": 5.72,
"learning_rate": 1.8558766336544113e-05,
"loss": 0.022,
"step": 379500
},
{
"epoch": 5.73,
"learning_rate": 1.8543692247395954e-05,
"loss": 0.0223,
"step": 380000
},
{
"epoch": 5.74,
"learning_rate": 1.8528618158247788e-05,
"loss": 0.0218,
"step": 380500
},
{
"epoch": 5.74,
"learning_rate": 1.8513544069099625e-05,
"loss": 0.0243,
"step": 381000
},
{
"epoch": 5.75,
"learning_rate": 1.8498469979951462e-05,
"loss": 0.0218,
"step": 381500
},
{
"epoch": 5.76,
"learning_rate": 1.84833958908033e-05,
"loss": 0.0229,
"step": 382000
},
{
"epoch": 5.77,
"learning_rate": 1.8468321801655134e-05,
"loss": 0.0226,
"step": 382500
},
{
"epoch": 5.77,
"learning_rate": 1.8453247712506975e-05,
"loss": 0.0207,
"step": 383000
},
{
"epoch": 5.78,
"learning_rate": 1.843817362335881e-05,
"loss": 0.0217,
"step": 383500
},
{
"epoch": 5.79,
"learning_rate": 1.8423099534210646e-05,
"loss": 0.0218,
"step": 384000
},
{
"epoch": 5.8,
"learning_rate": 1.840802544506248e-05,
"loss": 0.0225,
"step": 384500
},
{
"epoch": 5.8,
"learning_rate": 1.839295135591432e-05,
"loss": 0.0233,
"step": 385000
},
{
"epoch": 5.81,
"learning_rate": 1.8377877266766155e-05,
"loss": 0.0211,
"step": 385500
},
{
"epoch": 5.82,
"learning_rate": 1.8362803177617992e-05,
"loss": 0.0213,
"step": 386000
},
{
"epoch": 5.83,
"learning_rate": 1.834772908846983e-05,
"loss": 0.0233,
"step": 386500
},
{
"epoch": 5.83,
"learning_rate": 1.8332654999321667e-05,
"loss": 0.0221,
"step": 387000
},
{
"epoch": 5.84,
"learning_rate": 1.83175809101735e-05,
"loss": 0.0224,
"step": 387500
},
{
"epoch": 5.85,
"learning_rate": 1.8302506821025342e-05,
"loss": 0.0228,
"step": 388000
},
{
"epoch": 5.86,
"learning_rate": 1.8287432731877176e-05,
"loss": 0.0209,
"step": 388500
},
{
"epoch": 5.86,
"learning_rate": 1.8272358642729014e-05,
"loss": 0.0224,
"step": 389000
},
{
"epoch": 5.87,
"learning_rate": 1.825728455358085e-05,
"loss": 0.0232,
"step": 389500
},
{
"epoch": 5.88,
"learning_rate": 1.824221046443269e-05,
"loss": 0.0224,
"step": 390000
},
{
"epoch": 5.89,
"learning_rate": 1.8227136375284522e-05,
"loss": 0.022,
"step": 390500
},
{
"epoch": 5.89,
"learning_rate": 1.8212062286136363e-05,
"loss": 0.0222,
"step": 391000
},
{
"epoch": 5.9,
"learning_rate": 1.8196988196988197e-05,
"loss": 0.0218,
"step": 391500
},
{
"epoch": 5.91,
"learning_rate": 1.8181914107840035e-05,
"loss": 0.0205,
"step": 392000
},
{
"epoch": 5.92,
"learning_rate": 1.816684001869187e-05,
"loss": 0.0225,
"step": 392500
},
{
"epoch": 5.92,
"learning_rate": 1.815176592954371e-05,
"loss": 0.0221,
"step": 393000
},
{
"epoch": 5.93,
"learning_rate": 1.8136691840395544e-05,
"loss": 0.0198,
"step": 393500
},
{
"epoch": 5.94,
"learning_rate": 1.812161775124738e-05,
"loss": 0.0217,
"step": 394000
},
{
"epoch": 5.95,
"learning_rate": 1.810654366209922e-05,
"loss": 0.0207,
"step": 394500
},
{
"epoch": 5.95,
"learning_rate": 1.8091469572951056e-05,
"loss": 0.0229,
"step": 395000
},
{
"epoch": 5.96,
"learning_rate": 1.807639548380289e-05,
"loss": 0.0246,
"step": 395500
},
{
"epoch": 5.97,
"learning_rate": 1.806132139465473e-05,
"loss": 0.0222,
"step": 396000
},
{
"epoch": 5.98,
"learning_rate": 1.8046247305506565e-05,
"loss": 0.0217,
"step": 396500
},
{
"epoch": 5.98,
"learning_rate": 1.8031173216358402e-05,
"loss": 0.024,
"step": 397000
},
{
"epoch": 5.99,
"learning_rate": 1.801609912721024e-05,
"loss": 0.0212,
"step": 397500
},
{
"epoch": 6.0,
"learning_rate": 1.8001025038062077e-05,
"loss": 0.0215,
"step": 398000
},
{
"epoch": 6.0,
"eval_accuracy": 0.9862980377553815,
"eval_f1": 0.9387226916343153,
"eval_loss": 0.04899383336305618,
"eval_precision": 0.9244231922482088,
"eval_recall": 0.9534715270486933,
"eval_runtime": 249.5171,
"eval_samples_per_second": 472.609,
"eval_steps_per_second": 29.541,
"step": 398034
},
{
"epoch": 6.01,
"learning_rate": 1.798595094891391e-05,
"loss": 0.0174,
"step": 398500
},
{
"epoch": 6.01,
"learning_rate": 1.7970876859765752e-05,
"loss": 0.0181,
"step": 399000
},
{
"epoch": 6.02,
"learning_rate": 1.7955802770617586e-05,
"loss": 0.0163,
"step": 399500
},
{
"epoch": 6.03,
"learning_rate": 1.7940728681469423e-05,
"loss": 0.0163,
"step": 400000
},
{
"epoch": 6.04,
"learning_rate": 1.7925654592321257e-05,
"loss": 0.0166,
"step": 400500
},
{
"epoch": 6.04,
"learning_rate": 1.7910580503173098e-05,
"loss": 0.0174,
"step": 401000
},
{
"epoch": 6.05,
"learning_rate": 1.7895506414024932e-05,
"loss": 0.0177,
"step": 401500
},
{
"epoch": 6.06,
"learning_rate": 1.788043232487677e-05,
"loss": 0.0175,
"step": 402000
},
{
"epoch": 6.07,
"learning_rate": 1.7865358235728607e-05,
"loss": 0.0176,
"step": 402500
},
{
"epoch": 6.07,
"learning_rate": 1.7850284146580444e-05,
"loss": 0.0171,
"step": 403000
},
{
"epoch": 6.08,
"learning_rate": 1.783521005743228e-05,
"loss": 0.0184,
"step": 403500
},
{
"epoch": 6.09,
"learning_rate": 1.782013596828412e-05,
"loss": 0.0172,
"step": 404000
},
{
"epoch": 6.1,
"learning_rate": 1.7805061879135953e-05,
"loss": 0.0179,
"step": 404500
},
{
"epoch": 6.11,
"learning_rate": 1.778998778998779e-05,
"loss": 0.0171,
"step": 405000
},
{
"epoch": 6.11,
"learning_rate": 1.7774913700839628e-05,
"loss": 0.0173,
"step": 405500
},
{
"epoch": 6.12,
"learning_rate": 1.7759839611691466e-05,
"loss": 0.018,
"step": 406000
},
{
"epoch": 6.13,
"learning_rate": 1.77447655225433e-05,
"loss": 0.0178,
"step": 406500
},
{
"epoch": 6.14,
"learning_rate": 1.772969143339514e-05,
"loss": 0.0174,
"step": 407000
},
{
"epoch": 6.14,
"learning_rate": 1.7714617344246974e-05,
"loss": 0.0188,
"step": 407500
},
{
"epoch": 6.15,
"learning_rate": 1.7699543255098812e-05,
"loss": 0.0182,
"step": 408000
},
{
"epoch": 6.16,
"learning_rate": 1.7684469165950646e-05,
"loss": 0.0177,
"step": 408500
},
{
"epoch": 6.17,
"learning_rate": 1.7669395076802487e-05,
"loss": 0.0182,
"step": 409000
},
{
"epoch": 6.17,
"learning_rate": 1.765432098765432e-05,
"loss": 0.0191,
"step": 409500
},
{
"epoch": 6.18,
"learning_rate": 1.7639246898506158e-05,
"loss": 0.0192,
"step": 410000
},
{
"epoch": 6.19,
"learning_rate": 1.7624172809357996e-05,
"loss": 0.0176,
"step": 410500
},
{
"epoch": 6.2,
"learning_rate": 1.7609098720209833e-05,
"loss": 0.0165,
"step": 411000
},
{
"epoch": 6.2,
"learning_rate": 1.7594024631061667e-05,
"loss": 0.017,
"step": 411500
},
{
"epoch": 6.21,
"learning_rate": 1.7578950541913508e-05,
"loss": 0.017,
"step": 412000
},
{
"epoch": 6.22,
"learning_rate": 1.7563876452765342e-05,
"loss": 0.0174,
"step": 412500
},
{
"epoch": 6.23,
"learning_rate": 1.754880236361718e-05,
"loss": 0.0191,
"step": 413000
},
{
"epoch": 6.23,
"learning_rate": 1.7533728274469017e-05,
"loss": 0.017,
"step": 413500
},
{
"epoch": 6.24,
"learning_rate": 1.7518654185320854e-05,
"loss": 0.0176,
"step": 414000
},
{
"epoch": 6.25,
"learning_rate": 1.7503580096172688e-05,
"loss": 0.0171,
"step": 414500
},
{
"epoch": 6.26,
"learning_rate": 1.7488506007024526e-05,
"loss": 0.018,
"step": 415000
},
{
"epoch": 6.26,
"learning_rate": 1.7473431917876363e-05,
"loss": 0.0189,
"step": 415500
},
{
"epoch": 6.27,
"learning_rate": 1.74583578287282e-05,
"loss": 0.0198,
"step": 416000
},
{
"epoch": 6.28,
"learning_rate": 1.7443283739580035e-05,
"loss": 0.0193,
"step": 416500
},
{
"epoch": 6.29,
"learning_rate": 1.7428209650431875e-05,
"loss": 0.0177,
"step": 417000
},
{
"epoch": 6.29,
"learning_rate": 1.741313556128371e-05,
"loss": 0.0182,
"step": 417500
},
{
"epoch": 6.3,
"learning_rate": 1.7398061472135547e-05,
"loss": 0.0183,
"step": 418000
},
{
"epoch": 6.31,
"learning_rate": 1.7382987382987384e-05,
"loss": 0.0178,
"step": 418500
},
{
"epoch": 6.32,
"learning_rate": 1.736791329383922e-05,
"loss": 0.0175,
"step": 419000
},
{
"epoch": 6.32,
"learning_rate": 1.7352839204691056e-05,
"loss": 0.018,
"step": 419500
},
{
"epoch": 6.33,
"learning_rate": 1.7337765115542896e-05,
"loss": 0.0186,
"step": 420000
},
{
"epoch": 6.34,
"learning_rate": 1.732269102639473e-05,
"loss": 0.017,
"step": 420500
},
{
"epoch": 6.35,
"learning_rate": 1.7307616937246568e-05,
"loss": 0.0173,
"step": 421000
},
{
"epoch": 6.35,
"learning_rate": 1.7292542848098405e-05,
"loss": 0.0185,
"step": 421500
},
{
"epoch": 6.36,
"learning_rate": 1.7277468758950243e-05,
"loss": 0.0177,
"step": 422000
},
{
"epoch": 6.37,
"learning_rate": 1.7262394669802077e-05,
"loss": 0.0196,
"step": 422500
},
{
"epoch": 6.38,
"learning_rate": 1.7247320580653914e-05,
"loss": 0.0189,
"step": 423000
},
{
"epoch": 6.38,
"learning_rate": 1.723224649150575e-05,
"loss": 0.0189,
"step": 423500
},
{
"epoch": 6.39,
"learning_rate": 1.721717240235759e-05,
"loss": 0.0199,
"step": 424000
},
{
"epoch": 6.4,
"learning_rate": 1.7202098313209423e-05,
"loss": 0.0188,
"step": 424500
},
{
"epoch": 6.41,
"learning_rate": 1.7187024224061264e-05,
"loss": 0.0189,
"step": 425000
},
{
"epoch": 6.41,
"learning_rate": 1.7171950134913098e-05,
"loss": 0.0182,
"step": 425500
},
{
"epoch": 6.42,
"learning_rate": 1.7156876045764935e-05,
"loss": 0.0182,
"step": 426000
},
{
"epoch": 6.43,
"learning_rate": 1.7141801956616773e-05,
"loss": 0.0186,
"step": 426500
},
{
"epoch": 6.44,
"learning_rate": 1.712672786746861e-05,
"loss": 0.0196,
"step": 427000
},
{
"epoch": 6.44,
"learning_rate": 1.7111653778320444e-05,
"loss": 0.0178,
"step": 427500
},
{
"epoch": 6.45,
"learning_rate": 1.7096579689172285e-05,
"loss": 0.0187,
"step": 428000
},
{
"epoch": 6.46,
"learning_rate": 1.708150560002412e-05,
"loss": 0.0189,
"step": 428500
},
{
"epoch": 6.47,
"learning_rate": 1.7066431510875957e-05,
"loss": 0.0191,
"step": 429000
},
{
"epoch": 6.47,
"learning_rate": 1.7051357421727794e-05,
"loss": 0.0188,
"step": 429500
},
{
"epoch": 6.48,
"learning_rate": 1.703628333257963e-05,
"loss": 0.0199,
"step": 430000
},
{
"epoch": 6.49,
"learning_rate": 1.7021209243431465e-05,
"loss": 0.0176,
"step": 430500
},
{
"epoch": 6.5,
"learning_rate": 1.7006135154283303e-05,
"loss": 0.0187,
"step": 431000
},
{
"epoch": 6.5,
"learning_rate": 1.699106106513514e-05,
"loss": 0.0186,
"step": 431500
},
{
"epoch": 6.51,
"learning_rate": 1.6975986975986978e-05,
"loss": 0.0185,
"step": 432000
},
{
"epoch": 6.52,
"learning_rate": 1.6960912886838812e-05,
"loss": 0.0177,
"step": 432500
},
{
"epoch": 6.53,
"learning_rate": 1.6945838797690653e-05,
"loss": 0.0187,
"step": 433000
},
{
"epoch": 6.53,
"learning_rate": 1.6930764708542487e-05,
"loss": 0.0192,
"step": 433500
},
{
"epoch": 6.54,
"learning_rate": 1.6915690619394324e-05,
"loss": 0.0186,
"step": 434000
},
{
"epoch": 6.55,
"learning_rate": 1.690061653024616e-05,
"loss": 0.0187,
"step": 434500
},
{
"epoch": 6.56,
"learning_rate": 1.6885542441098e-05,
"loss": 0.0189,
"step": 435000
},
{
"epoch": 6.56,
"learning_rate": 1.6870468351949833e-05,
"loss": 0.0193,
"step": 435500
},
{
"epoch": 6.57,
"learning_rate": 1.6855394262801674e-05,
"loss": 0.0173,
"step": 436000
},
{
"epoch": 6.58,
"learning_rate": 1.6840320173653508e-05,
"loss": 0.0192,
"step": 436500
},
{
"epoch": 6.59,
"learning_rate": 1.6825246084505345e-05,
"loss": 0.0195,
"step": 437000
},
{
"epoch": 6.59,
"learning_rate": 1.6810171995357183e-05,
"loss": 0.0173,
"step": 437500
},
{
"epoch": 6.6,
"learning_rate": 1.679509790620902e-05,
"loss": 0.0196,
"step": 438000
},
{
"epoch": 6.61,
"learning_rate": 1.6780023817060854e-05,
"loss": 0.019,
"step": 438500
},
{
"epoch": 6.62,
"learning_rate": 1.676494972791269e-05,
"loss": 0.0174,
"step": 439000
},
{
"epoch": 6.63,
"learning_rate": 1.674987563876453e-05,
"loss": 0.0183,
"step": 439500
},
{
"epoch": 6.63,
"learning_rate": 1.6734801549616366e-05,
"loss": 0.0179,
"step": 440000
},
{
"epoch": 6.64,
"learning_rate": 1.67197274604682e-05,
"loss": 0.0198,
"step": 440500
},
{
"epoch": 6.65,
"learning_rate": 1.670465337132004e-05,
"loss": 0.0193,
"step": 441000
},
{
"epoch": 6.66,
"learning_rate": 1.6689579282171875e-05,
"loss": 0.0187,
"step": 441500
},
{
"epoch": 6.66,
"learning_rate": 1.6674505193023713e-05,
"loss": 0.0204,
"step": 442000
},
{
"epoch": 6.67,
"learning_rate": 1.665943110387555e-05,
"loss": 0.0192,
"step": 442500
},
{
"epoch": 6.68,
"learning_rate": 1.6644357014727387e-05,
"loss": 0.0177,
"step": 443000
},
{
"epoch": 6.69,
"learning_rate": 1.662928292557922e-05,
"loss": 0.0182,
"step": 443500
},
{
"epoch": 6.69,
"learning_rate": 1.6614208836431062e-05,
"loss": 0.0196,
"step": 444000
},
{
"epoch": 6.7,
"learning_rate": 1.6599134747282896e-05,
"loss": 0.0195,
"step": 444500
},
{
"epoch": 6.71,
"learning_rate": 1.6584060658134734e-05,
"loss": 0.0178,
"step": 445000
},
{
"epoch": 6.72,
"learning_rate": 1.6568986568986568e-05,
"loss": 0.0196,
"step": 445500
},
{
"epoch": 6.72,
"learning_rate": 1.655391247983841e-05,
"loss": 0.0197,
"step": 446000
},
{
"epoch": 6.73,
"learning_rate": 1.6538838390690243e-05,
"loss": 0.0187,
"step": 446500
},
{
"epoch": 6.74,
"learning_rate": 1.652376430154208e-05,
"loss": 0.0192,
"step": 447000
},
{
"epoch": 6.75,
"learning_rate": 1.6508690212393917e-05,
"loss": 0.0194,
"step": 447500
},
{
"epoch": 6.75,
"learning_rate": 1.649361612324575e-05,
"loss": 0.0198,
"step": 448000
},
{
"epoch": 6.76,
"learning_rate": 1.647854203409759e-05,
"loss": 0.0194,
"step": 448500
},
{
"epoch": 6.77,
"learning_rate": 1.6463467944949426e-05,
"loss": 0.0187,
"step": 449000
},
{
"epoch": 6.78,
"learning_rate": 1.6448393855801264e-05,
"loss": 0.02,
"step": 449500
},
{
"epoch": 6.78,
"learning_rate": 1.6433319766653098e-05,
"loss": 0.0199,
"step": 450000
},
{
"epoch": 6.79,
"learning_rate": 1.641824567750494e-05,
"loss": 0.0185,
"step": 450500
},
{
"epoch": 6.8,
"learning_rate": 1.6403171588356773e-05,
"loss": 0.0204,
"step": 451000
},
{
"epoch": 6.81,
"learning_rate": 1.638809749920861e-05,
"loss": 0.0181,
"step": 451500
},
{
"epoch": 6.81,
"learning_rate": 1.6373023410060447e-05,
"loss": 0.0194,
"step": 452000
},
{
"epoch": 6.82,
"learning_rate": 1.6357949320912285e-05,
"loss": 0.0178,
"step": 452500
},
{
"epoch": 6.83,
"learning_rate": 1.634287523176412e-05,
"loss": 0.0186,
"step": 453000
},
{
"epoch": 6.84,
"learning_rate": 1.6327801142615956e-05,
"loss": 0.0187,
"step": 453500
},
{
"epoch": 6.84,
"learning_rate": 1.6312727053467794e-05,
"loss": 0.0192,
"step": 454000
},
{
"epoch": 6.85,
"learning_rate": 1.629765296431963e-05,
"loss": 0.0193,
"step": 454500
},
{
"epoch": 6.86,
"learning_rate": 1.6282578875171465e-05,
"loss": 0.0192,
"step": 455000
},
{
"epoch": 6.87,
"learning_rate": 1.6267504786023306e-05,
"loss": 0.0197,
"step": 455500
},
{
"epoch": 6.87,
"learning_rate": 1.625243069687514e-05,
"loss": 0.0199,
"step": 456000
},
{
"epoch": 6.88,
"learning_rate": 1.6237356607726977e-05,
"loss": 0.0195,
"step": 456500
},
{
"epoch": 6.89,
"learning_rate": 1.6222282518578815e-05,
"loss": 0.0196,
"step": 457000
},
{
"epoch": 6.9,
"learning_rate": 1.6207208429430652e-05,
"loss": 0.0194,
"step": 457500
},
{
"epoch": 6.9,
"learning_rate": 1.6192134340282486e-05,
"loss": 0.0189,
"step": 458000
},
{
"epoch": 6.91,
"learning_rate": 1.6177060251134327e-05,
"loss": 0.0208,
"step": 458500
},
{
"epoch": 6.92,
"learning_rate": 1.616198616198616e-05,
"loss": 0.0188,
"step": 459000
},
{
"epoch": 6.93,
"learning_rate": 1.6146912072838e-05,
"loss": 0.0187,
"step": 459500
},
{
"epoch": 6.93,
"learning_rate": 1.6131837983689836e-05,
"loss": 0.0176,
"step": 460000
},
{
"epoch": 6.94,
"learning_rate": 1.6116763894541673e-05,
"loss": 0.0185,
"step": 460500
},
{
"epoch": 6.95,
"learning_rate": 1.6101689805393507e-05,
"loss": 0.0197,
"step": 461000
},
{
"epoch": 6.96,
"learning_rate": 1.6086615716245345e-05,
"loss": 0.0193,
"step": 461500
},
{
"epoch": 6.96,
"learning_rate": 1.6071541627097182e-05,
"loss": 0.0192,
"step": 462000
},
{
"epoch": 6.97,
"learning_rate": 1.605646753794902e-05,
"loss": 0.0182,
"step": 462500
},
{
"epoch": 6.98,
"learning_rate": 1.6041393448800854e-05,
"loss": 0.0199,
"step": 463000
},
{
"epoch": 6.99,
"learning_rate": 1.6026319359652695e-05,
"loss": 0.0194,
"step": 463500
},
{
"epoch": 6.99,
"learning_rate": 1.601124527050453e-05,
"loss": 0.0187,
"step": 464000
},
{
"epoch": 7.0,
"eval_accuracy": 0.9867848831812434,
"eval_f1": 0.9414967774830111,
"eval_loss": 0.04958844557404518,
"eval_precision": 0.9220167739418763,
"eval_recall": 0.9618176787036811,
"eval_runtime": 215.2851,
"eval_samples_per_second": 547.757,
"eval_steps_per_second": 34.238,
"step": 464373
},
{
"epoch": 7.0,
"learning_rate": 1.5996171181356366e-05,
"loss": 0.0178,
"step": 464500
},
{
"epoch": 7.01,
"learning_rate": 1.5981097092208203e-05,
"loss": 0.0159,
"step": 465000
},
{
"epoch": 7.02,
"learning_rate": 1.596602300306004e-05,
"loss": 0.0156,
"step": 465500
},
{
"epoch": 7.02,
"learning_rate": 1.5950948913911875e-05,
"loss": 0.0141,
"step": 466000
},
{
"epoch": 7.03,
"learning_rate": 1.5935874824763716e-05,
"loss": 0.0136,
"step": 466500
},
{
"epoch": 7.04,
"learning_rate": 1.592080073561555e-05,
"loss": 0.0139,
"step": 467000
},
{
"epoch": 7.05,
"learning_rate": 1.5905726646467387e-05,
"loss": 0.0144,
"step": 467500
},
{
"epoch": 7.05,
"learning_rate": 1.5890652557319225e-05,
"loss": 0.0142,
"step": 468000
},
{
"epoch": 7.06,
"learning_rate": 1.5875578468171062e-05,
"loss": 0.0146,
"step": 468500
},
{
"epoch": 7.07,
"learning_rate": 1.5860504379022896e-05,
"loss": 0.0157,
"step": 469000
},
{
"epoch": 7.08,
"learning_rate": 1.5845430289874733e-05,
"loss": 0.0144,
"step": 469500
},
{
"epoch": 7.08,
"learning_rate": 1.583035620072657e-05,
"loss": 0.0153,
"step": 470000
},
{
"epoch": 7.09,
"learning_rate": 1.581528211157841e-05,
"loss": 0.0141,
"step": 470500
},
{
"epoch": 7.1,
"learning_rate": 1.5800208022430242e-05,
"loss": 0.014,
"step": 471000
},
{
"epoch": 7.11,
"learning_rate": 1.5785133933282083e-05,
"loss": 0.0149,
"step": 471500
},
{
"epoch": 7.11,
"learning_rate": 1.5770059844133917e-05,
"loss": 0.015,
"step": 472000
},
{
"epoch": 7.12,
"learning_rate": 1.5754985754985755e-05,
"loss": 0.0137,
"step": 472500
},
{
"epoch": 7.13,
"learning_rate": 1.5739911665837592e-05,
"loss": 0.0157,
"step": 473000
},
{
"epoch": 7.14,
"learning_rate": 1.572483757668943e-05,
"loss": 0.0153,
"step": 473500
},
{
"epoch": 7.15,
"learning_rate": 1.5709763487541264e-05,
"loss": 0.0148,
"step": 474000
},
{
"epoch": 7.15,
"learning_rate": 1.5694689398393104e-05,
"loss": 0.0153,
"step": 474500
},
{
"epoch": 7.16,
"learning_rate": 1.567961530924494e-05,
"loss": 0.0147,
"step": 475000
},
{
"epoch": 7.17,
"learning_rate": 1.5664541220096776e-05,
"loss": 0.0156,
"step": 475500
},
{
"epoch": 7.18,
"learning_rate": 1.564946713094861e-05,
"loss": 0.0148,
"step": 476000
},
{
"epoch": 7.18,
"learning_rate": 1.563439304180045e-05,
"loss": 0.0162,
"step": 476500
},
{
"epoch": 7.19,
"learning_rate": 1.5619318952652285e-05,
"loss": 0.0155,
"step": 477000
},
{
"epoch": 7.2,
"learning_rate": 1.5604244863504122e-05,
"loss": 0.015,
"step": 477500
},
{
"epoch": 7.21,
"learning_rate": 1.558917077435596e-05,
"loss": 0.0149,
"step": 478000
},
{
"epoch": 7.21,
"learning_rate": 1.5574096685207797e-05,
"loss": 0.0148,
"step": 478500
},
{
"epoch": 7.22,
"learning_rate": 1.555902259605963e-05,
"loss": 0.0153,
"step": 479000
},
{
"epoch": 7.23,
"learning_rate": 1.5543948506911472e-05,
"loss": 0.0163,
"step": 479500
},
{
"epoch": 7.24,
"learning_rate": 1.5528874417763306e-05,
"loss": 0.0168,
"step": 480000
},
{
"epoch": 7.24,
"learning_rate": 1.5513800328615143e-05,
"loss": 0.0151,
"step": 480500
},
{
"epoch": 7.25,
"learning_rate": 1.549872623946698e-05,
"loss": 0.0148,
"step": 481000
},
{
"epoch": 7.26,
"learning_rate": 1.5483652150318818e-05,
"loss": 0.0152,
"step": 481500
},
{
"epoch": 7.27,
"learning_rate": 1.5468578061170652e-05,
"loss": 0.0165,
"step": 482000
},
{
"epoch": 7.27,
"learning_rate": 1.5453503972022493e-05,
"loss": 0.0162,
"step": 482500
},
{
"epoch": 7.28,
"learning_rate": 1.5438429882874327e-05,
"loss": 0.0152,
"step": 483000
},
{
"epoch": 7.29,
"learning_rate": 1.5423355793726164e-05,
"loss": 0.0174,
"step": 483500
},
{
"epoch": 7.3,
"learning_rate": 1.5408281704578e-05,
"loss": 0.0151,
"step": 484000
},
{
"epoch": 7.3,
"learning_rate": 1.539320761542984e-05,
"loss": 0.0147,
"step": 484500
},
{
"epoch": 7.31,
"learning_rate": 1.5378133526281673e-05,
"loss": 0.0156,
"step": 485000
},
{
"epoch": 7.32,
"learning_rate": 1.536305943713351e-05,
"loss": 0.0151,
"step": 485500
},
{
"epoch": 7.33,
"learning_rate": 1.5347985347985348e-05,
"loss": 0.0153,
"step": 486000
},
{
"epoch": 7.33,
"learning_rate": 1.5332911258837186e-05,
"loss": 0.0164,
"step": 486500
},
{
"epoch": 7.34,
"learning_rate": 1.531783716968902e-05,
"loss": 0.0159,
"step": 487000
},
{
"epoch": 7.35,
"learning_rate": 1.530276308054086e-05,
"loss": 0.0152,
"step": 487500
},
{
"epoch": 7.36,
"learning_rate": 1.5287688991392694e-05,
"loss": 0.0161,
"step": 488000
},
{
"epoch": 7.36,
"learning_rate": 1.5272614902244532e-05,
"loss": 0.0157,
"step": 488500
},
{
"epoch": 7.37,
"learning_rate": 1.5257540813096368e-05,
"loss": 0.0154,
"step": 489000
},
{
"epoch": 7.38,
"learning_rate": 1.5242466723948207e-05,
"loss": 0.0155,
"step": 489500
},
{
"epoch": 7.39,
"learning_rate": 1.522739263480004e-05,
"loss": 0.0155,
"step": 490000
},
{
"epoch": 7.39,
"learning_rate": 1.521231854565188e-05,
"loss": 0.0164,
"step": 490500
},
{
"epoch": 7.4,
"learning_rate": 1.5197244456503716e-05,
"loss": 0.0158,
"step": 491000
},
{
"epoch": 7.41,
"learning_rate": 1.5182170367355553e-05,
"loss": 0.0144,
"step": 491500
},
{
"epoch": 7.42,
"learning_rate": 1.5167096278207389e-05,
"loss": 0.0153,
"step": 492000
},
{
"epoch": 7.42,
"learning_rate": 1.5152022189059228e-05,
"loss": 0.0159,
"step": 492500
},
{
"epoch": 7.43,
"learning_rate": 1.5136948099911062e-05,
"loss": 0.0158,
"step": 493000
},
{
"epoch": 7.44,
"learning_rate": 1.5121874010762901e-05,
"loss": 0.0163,
"step": 493500
},
{
"epoch": 7.45,
"learning_rate": 1.5106799921614735e-05,
"loss": 0.0154,
"step": 494000
},
{
"epoch": 7.45,
"learning_rate": 1.5091725832466574e-05,
"loss": 0.0155,
"step": 494500
},
{
"epoch": 7.46,
"learning_rate": 1.507665174331841e-05,
"loss": 0.0152,
"step": 495000
},
{
"epoch": 7.47,
"learning_rate": 1.5061577654170247e-05,
"loss": 0.0143,
"step": 495500
},
{
"epoch": 7.48,
"learning_rate": 1.5046503565022083e-05,
"loss": 0.0145,
"step": 496000
},
{
"epoch": 7.48,
"learning_rate": 1.5031429475873922e-05,
"loss": 0.0161,
"step": 496500
},
{
"epoch": 7.49,
"learning_rate": 1.5016355386725756e-05,
"loss": 0.0156,
"step": 497000
},
{
"epoch": 7.5,
"learning_rate": 1.5001281297577595e-05,
"loss": 0.0163,
"step": 497500
},
{
"epoch": 7.51,
"learning_rate": 1.4986207208429431e-05,
"loss": 0.0159,
"step": 498000
},
{
"epoch": 7.51,
"learning_rate": 1.4971133119281268e-05,
"loss": 0.0149,
"step": 498500
},
{
"epoch": 7.52,
"learning_rate": 1.4956059030133106e-05,
"loss": 0.0166,
"step": 499000
},
{
"epoch": 7.53,
"learning_rate": 1.4940984940984942e-05,
"loss": 0.0154,
"step": 499500
},
{
"epoch": 7.54,
"learning_rate": 1.4925910851836779e-05,
"loss": 0.0147,
"step": 500000
},
{
"epoch": 7.54,
"learning_rate": 1.4910836762688615e-05,
"loss": 0.0151,
"step": 500500
},
{
"epoch": 7.55,
"learning_rate": 1.4895762673540452e-05,
"loss": 0.0141,
"step": 501000
},
{
"epoch": 7.56,
"learning_rate": 1.488068858439229e-05,
"loss": 0.0149,
"step": 501500
},
{
"epoch": 7.57,
"learning_rate": 1.4865614495244125e-05,
"loss": 0.0165,
"step": 502000
},
{
"epoch": 7.57,
"learning_rate": 1.4850540406095963e-05,
"loss": 0.0165,
"step": 502500
},
{
"epoch": 7.58,
"learning_rate": 1.48354663169478e-05,
"loss": 0.0169,
"step": 503000
},
{
"epoch": 7.59,
"learning_rate": 1.4820392227799636e-05,
"loss": 0.0163,
"step": 503500
},
{
"epoch": 7.6,
"learning_rate": 1.4805318138651473e-05,
"loss": 0.0155,
"step": 504000
},
{
"epoch": 7.6,
"learning_rate": 1.4790244049503309e-05,
"loss": 0.0166,
"step": 504500
},
{
"epoch": 7.61,
"learning_rate": 1.4775169960355146e-05,
"loss": 0.015,
"step": 505000
},
{
"epoch": 7.62,
"learning_rate": 1.4760095871206984e-05,
"loss": 0.0153,
"step": 505500
},
{
"epoch": 7.63,
"learning_rate": 1.474502178205882e-05,
"loss": 0.016,
"step": 506000
},
{
"epoch": 7.64,
"learning_rate": 1.4729947692910657e-05,
"loss": 0.0167,
"step": 506500
},
{
"epoch": 7.64,
"learning_rate": 1.4714873603762494e-05,
"loss": 0.0165,
"step": 507000
},
{
"epoch": 7.65,
"learning_rate": 1.469979951461433e-05,
"loss": 0.0161,
"step": 507500
},
{
"epoch": 7.66,
"learning_rate": 1.4684725425466168e-05,
"loss": 0.0162,
"step": 508000
},
{
"epoch": 7.67,
"learning_rate": 1.4669651336318003e-05,
"loss": 0.0158,
"step": 508500
},
{
"epoch": 7.67,
"learning_rate": 1.465457724716984e-05,
"loss": 0.0147,
"step": 509000
},
{
"epoch": 7.68,
"learning_rate": 1.4639503158021678e-05,
"loss": 0.0169,
"step": 509500
},
{
"epoch": 7.69,
"learning_rate": 1.4624429068873514e-05,
"loss": 0.0159,
"step": 510000
},
{
"epoch": 7.7,
"learning_rate": 1.4609354979725351e-05,
"loss": 0.0156,
"step": 510500
},
{
"epoch": 7.7,
"learning_rate": 1.4594280890577189e-05,
"loss": 0.0156,
"step": 511000
},
{
"epoch": 7.71,
"learning_rate": 1.4579206801429024e-05,
"loss": 0.0167,
"step": 511500
},
{
"epoch": 7.72,
"learning_rate": 1.4564132712280862e-05,
"loss": 0.0161,
"step": 512000
},
{
"epoch": 7.73,
"learning_rate": 1.4549058623132698e-05,
"loss": 0.0164,
"step": 512500
},
{
"epoch": 7.73,
"learning_rate": 1.4533984533984535e-05,
"loss": 0.015,
"step": 513000
},
{
"epoch": 7.74,
"learning_rate": 1.4518910444836372e-05,
"loss": 0.0157,
"step": 513500
},
{
"epoch": 7.75,
"learning_rate": 1.4503836355688208e-05,
"loss": 0.0163,
"step": 514000
},
{
"epoch": 7.76,
"learning_rate": 1.4488762266540046e-05,
"loss": 0.0158,
"step": 514500
},
{
"epoch": 7.76,
"learning_rate": 1.4473688177391883e-05,
"loss": 0.0164,
"step": 515000
},
{
"epoch": 7.77,
"learning_rate": 1.4458614088243719e-05,
"loss": 0.0164,
"step": 515500
},
{
"epoch": 7.78,
"learning_rate": 1.4443539999095556e-05,
"loss": 0.0155,
"step": 516000
},
{
"epoch": 7.79,
"learning_rate": 1.4428465909947392e-05,
"loss": 0.0168,
"step": 516500
},
{
"epoch": 7.79,
"learning_rate": 1.441339182079923e-05,
"loss": 0.0163,
"step": 517000
},
{
"epoch": 7.8,
"learning_rate": 1.4398317731651067e-05,
"loss": 0.0154,
"step": 517500
},
{
"epoch": 7.81,
"learning_rate": 1.4383243642502902e-05,
"loss": 0.0159,
"step": 518000
},
{
"epoch": 7.82,
"learning_rate": 1.436816955335474e-05,
"loss": 0.0175,
"step": 518500
},
{
"epoch": 7.82,
"learning_rate": 1.4353095464206577e-05,
"loss": 0.0164,
"step": 519000
},
{
"epoch": 7.83,
"learning_rate": 1.4338021375058413e-05,
"loss": 0.0155,
"step": 519500
},
{
"epoch": 7.84,
"learning_rate": 1.432294728591025e-05,
"loss": 0.0167,
"step": 520000
},
{
"epoch": 7.85,
"learning_rate": 1.4307873196762086e-05,
"loss": 0.0157,
"step": 520500
},
{
"epoch": 7.85,
"learning_rate": 1.4292799107613924e-05,
"loss": 0.0165,
"step": 521000
},
{
"epoch": 7.86,
"learning_rate": 1.4277725018465761e-05,
"loss": 0.016,
"step": 521500
},
{
"epoch": 7.87,
"learning_rate": 1.4262650929317597e-05,
"loss": 0.0161,
"step": 522000
},
{
"epoch": 7.88,
"learning_rate": 1.4247576840169432e-05,
"loss": 0.0155,
"step": 522500
},
{
"epoch": 7.88,
"learning_rate": 1.423250275102127e-05,
"loss": 0.015,
"step": 523000
},
{
"epoch": 7.89,
"learning_rate": 1.4217428661873106e-05,
"loss": 0.0157,
"step": 523500
},
{
"epoch": 7.9,
"learning_rate": 1.4202354572724943e-05,
"loss": 0.016,
"step": 524000
},
{
"epoch": 7.91,
"learning_rate": 1.4187280483576779e-05,
"loss": 0.0159,
"step": 524500
},
{
"epoch": 7.91,
"learning_rate": 1.4172206394428616e-05,
"loss": 0.0169,
"step": 525000
},
{
"epoch": 7.92,
"learning_rate": 1.4157132305280454e-05,
"loss": 0.0157,
"step": 525500
},
{
"epoch": 7.93,
"learning_rate": 1.414205821613229e-05,
"loss": 0.0161,
"step": 526000
},
{
"epoch": 7.94,
"learning_rate": 1.4126984126984127e-05,
"loss": 0.0155,
"step": 526500
},
{
"epoch": 7.94,
"learning_rate": 1.4111910037835964e-05,
"loss": 0.0165,
"step": 527000
},
{
"epoch": 7.95,
"learning_rate": 1.40968359486878e-05,
"loss": 0.0148,
"step": 527500
},
{
"epoch": 7.96,
"learning_rate": 1.4081761859539637e-05,
"loss": 0.0155,
"step": 528000
},
{
"epoch": 7.97,
"learning_rate": 1.4066687770391473e-05,
"loss": 0.0148,
"step": 528500
},
{
"epoch": 7.97,
"learning_rate": 1.405161368124331e-05,
"loss": 0.0161,
"step": 529000
},
{
"epoch": 7.98,
"learning_rate": 1.4036539592095148e-05,
"loss": 0.0161,
"step": 529500
},
{
"epoch": 7.99,
"learning_rate": 1.4021465502946984e-05,
"loss": 0.0164,
"step": 530000
},
{
"epoch": 8.0,
"learning_rate": 1.4006391413798821e-05,
"loss": 0.0168,
"step": 530500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9867512582951073,
"eval_f1": 0.9419972937708403,
"eval_loss": 0.05773118510842323,
"eval_precision": 0.9323328948941767,
"eval_recall": 0.951864150206314,
"eval_runtime": 228.8409,
"eval_samples_per_second": 515.31,
"eval_steps_per_second": 32.21,
"step": 530712
},
{
"epoch": 8.0,
"learning_rate": 1.3991317324650657e-05,
"loss": 0.0136,
"step": 531000
},
{
"epoch": 8.01,
"learning_rate": 1.3976243235502494e-05,
"loss": 0.0122,
"step": 531500
},
{
"epoch": 8.02,
"learning_rate": 1.3961169146354332e-05,
"loss": 0.0137,
"step": 532000
},
{
"epoch": 8.03,
"learning_rate": 1.3946095057206167e-05,
"loss": 0.0119,
"step": 532500
},
{
"epoch": 8.03,
"learning_rate": 1.3931020968058005e-05,
"loss": 0.0123,
"step": 533000
},
{
"epoch": 8.04,
"learning_rate": 1.3915946878909842e-05,
"loss": 0.0138,
"step": 533500
},
{
"epoch": 8.05,
"learning_rate": 1.3900872789761678e-05,
"loss": 0.0129,
"step": 534000
},
{
"epoch": 8.06,
"learning_rate": 1.3885798700613515e-05,
"loss": 0.0117,
"step": 534500
},
{
"epoch": 8.06,
"learning_rate": 1.3870724611465351e-05,
"loss": 0.0117,
"step": 535000
},
{
"epoch": 8.07,
"learning_rate": 1.3855650522317188e-05,
"loss": 0.0124,
"step": 535500
},
{
"epoch": 8.08,
"learning_rate": 1.3840576433169026e-05,
"loss": 0.0123,
"step": 536000
},
{
"epoch": 8.09,
"learning_rate": 1.3825502344020862e-05,
"loss": 0.0125,
"step": 536500
},
{
"epoch": 8.09,
"learning_rate": 1.3810428254872699e-05,
"loss": 0.0127,
"step": 537000
},
{
"epoch": 8.1,
"learning_rate": 1.3795354165724536e-05,
"loss": 0.0128,
"step": 537500
},
{
"epoch": 8.11,
"learning_rate": 1.3780280076576372e-05,
"loss": 0.0125,
"step": 538000
},
{
"epoch": 8.12,
"learning_rate": 1.376520598742821e-05,
"loss": 0.0121,
"step": 538500
},
{
"epoch": 8.12,
"learning_rate": 1.3750131898280045e-05,
"loss": 0.0133,
"step": 539000
},
{
"epoch": 8.13,
"learning_rate": 1.3735057809131883e-05,
"loss": 0.0122,
"step": 539500
},
{
"epoch": 8.14,
"learning_rate": 1.371998371998372e-05,
"loss": 0.0129,
"step": 540000
},
{
"epoch": 8.15,
"learning_rate": 1.3704909630835556e-05,
"loss": 0.0129,
"step": 540500
},
{
"epoch": 8.16,
"learning_rate": 1.3689835541687393e-05,
"loss": 0.0143,
"step": 541000
},
{
"epoch": 8.16,
"learning_rate": 1.367476145253923e-05,
"loss": 0.0134,
"step": 541500
},
{
"epoch": 8.17,
"learning_rate": 1.3659687363391066e-05,
"loss": 0.0126,
"step": 542000
},
{
"epoch": 8.18,
"learning_rate": 1.3644613274242904e-05,
"loss": 0.0123,
"step": 542500
},
{
"epoch": 8.19,
"learning_rate": 1.362953918509474e-05,
"loss": 0.0133,
"step": 543000
},
{
"epoch": 8.19,
"learning_rate": 1.3614465095946577e-05,
"loss": 0.013,
"step": 543500
},
{
"epoch": 8.2,
"learning_rate": 1.3599391006798414e-05,
"loss": 0.0127,
"step": 544000
},
{
"epoch": 8.21,
"learning_rate": 1.358431691765025e-05,
"loss": 0.0124,
"step": 544500
},
{
"epoch": 8.22,
"learning_rate": 1.3569242828502088e-05,
"loss": 0.0131,
"step": 545000
},
{
"epoch": 8.22,
"learning_rate": 1.3554168739353925e-05,
"loss": 0.0129,
"step": 545500
},
{
"epoch": 8.23,
"learning_rate": 1.353909465020576e-05,
"loss": 0.0137,
"step": 546000
},
{
"epoch": 8.24,
"learning_rate": 1.3524020561057598e-05,
"loss": 0.0119,
"step": 546500
},
{
"epoch": 8.25,
"learning_rate": 1.3508946471909434e-05,
"loss": 0.0133,
"step": 547000
},
{
"epoch": 8.25,
"learning_rate": 1.3493872382761271e-05,
"loss": 0.0134,
"step": 547500
},
{
"epoch": 8.26,
"learning_rate": 1.3478798293613109e-05,
"loss": 0.0128,
"step": 548000
},
{
"epoch": 8.27,
"learning_rate": 1.3463724204464945e-05,
"loss": 0.0141,
"step": 548500
},
{
"epoch": 8.28,
"learning_rate": 1.3448650115316782e-05,
"loss": 0.0132,
"step": 549000
},
{
"epoch": 8.28,
"learning_rate": 1.343357602616862e-05,
"loss": 0.0144,
"step": 549500
},
{
"epoch": 8.29,
"learning_rate": 1.3418501937020455e-05,
"loss": 0.0136,
"step": 550000
},
{
"epoch": 8.3,
"learning_rate": 1.3403427847872293e-05,
"loss": 0.0129,
"step": 550500
},
{
"epoch": 8.31,
"learning_rate": 1.3388353758724128e-05,
"loss": 0.0134,
"step": 551000
},
{
"epoch": 8.31,
"learning_rate": 1.3373279669575966e-05,
"loss": 0.0131,
"step": 551500
},
{
"epoch": 8.32,
"learning_rate": 1.3358205580427803e-05,
"loss": 0.0124,
"step": 552000
},
{
"epoch": 8.33,
"learning_rate": 1.3343131491279639e-05,
"loss": 0.0127,
"step": 552500
},
{
"epoch": 8.34,
"learning_rate": 1.3328057402131476e-05,
"loss": 0.0128,
"step": 553000
},
{
"epoch": 8.34,
"learning_rate": 1.3312983312983314e-05,
"loss": 0.0142,
"step": 553500
},
{
"epoch": 8.35,
"learning_rate": 1.329790922383515e-05,
"loss": 0.0127,
"step": 554000
},
{
"epoch": 8.36,
"learning_rate": 1.3282835134686987e-05,
"loss": 0.0127,
"step": 554500
},
{
"epoch": 8.37,
"learning_rate": 1.3267761045538823e-05,
"loss": 0.0135,
"step": 555000
},
{
"epoch": 8.37,
"learning_rate": 1.325268695639066e-05,
"loss": 0.0142,
"step": 555500
},
{
"epoch": 8.38,
"learning_rate": 1.3237612867242497e-05,
"loss": 0.0126,
"step": 556000
},
{
"epoch": 8.39,
"learning_rate": 1.3222538778094333e-05,
"loss": 0.013,
"step": 556500
},
{
"epoch": 8.4,
"learning_rate": 1.320746468894617e-05,
"loss": 0.0127,
"step": 557000
},
{
"epoch": 8.4,
"learning_rate": 1.3192390599798008e-05,
"loss": 0.0136,
"step": 557500
},
{
"epoch": 8.41,
"learning_rate": 1.3177316510649844e-05,
"loss": 0.0132,
"step": 558000
},
{
"epoch": 8.42,
"learning_rate": 1.3162242421501681e-05,
"loss": 0.0141,
"step": 558500
},
{
"epoch": 8.43,
"learning_rate": 1.3147168332353517e-05,
"loss": 0.0121,
"step": 559000
},
{
"epoch": 8.43,
"learning_rate": 1.3132094243205354e-05,
"loss": 0.0129,
"step": 559500
},
{
"epoch": 8.44,
"learning_rate": 1.3117020154057192e-05,
"loss": 0.0138,
"step": 560000
},
{
"epoch": 8.45,
"learning_rate": 1.3101946064909027e-05,
"loss": 0.0132,
"step": 560500
},
{
"epoch": 8.46,
"learning_rate": 1.3086871975760865e-05,
"loss": 0.0131,
"step": 561000
},
{
"epoch": 8.46,
"learning_rate": 1.30717978866127e-05,
"loss": 0.0133,
"step": 561500
},
{
"epoch": 8.47,
"learning_rate": 1.3056723797464538e-05,
"loss": 0.013,
"step": 562000
},
{
"epoch": 8.48,
"learning_rate": 1.3041649708316375e-05,
"loss": 0.0152,
"step": 562500
},
{
"epoch": 8.49,
"learning_rate": 1.3026575619168211e-05,
"loss": 0.0142,
"step": 563000
},
{
"epoch": 8.49,
"learning_rate": 1.3011501530020049e-05,
"loss": 0.0132,
"step": 563500
},
{
"epoch": 8.5,
"learning_rate": 1.2996427440871886e-05,
"loss": 0.0138,
"step": 564000
},
{
"epoch": 8.51,
"learning_rate": 1.2981353351723722e-05,
"loss": 0.0127,
"step": 564500
},
{
"epoch": 8.52,
"learning_rate": 1.2966279262575559e-05,
"loss": 0.0135,
"step": 565000
},
{
"epoch": 8.52,
"learning_rate": 1.2951205173427395e-05,
"loss": 0.014,
"step": 565500
},
{
"epoch": 8.53,
"learning_rate": 1.2936131084279232e-05,
"loss": 0.014,
"step": 566000
},
{
"epoch": 8.54,
"learning_rate": 1.292105699513107e-05,
"loss": 0.0131,
"step": 566500
},
{
"epoch": 8.55,
"learning_rate": 1.2905982905982905e-05,
"loss": 0.0137,
"step": 567000
},
{
"epoch": 8.55,
"learning_rate": 1.2890908816834743e-05,
"loss": 0.0141,
"step": 567500
},
{
"epoch": 8.56,
"learning_rate": 1.287583472768658e-05,
"loss": 0.0142,
"step": 568000
},
{
"epoch": 8.57,
"learning_rate": 1.2860760638538416e-05,
"loss": 0.0131,
"step": 568500
},
{
"epoch": 8.58,
"learning_rate": 1.2845686549390253e-05,
"loss": 0.0129,
"step": 569000
},
{
"epoch": 8.58,
"learning_rate": 1.2830612460242089e-05,
"loss": 0.0139,
"step": 569500
},
{
"epoch": 8.59,
"learning_rate": 1.2815538371093927e-05,
"loss": 0.0134,
"step": 570000
},
{
"epoch": 8.6,
"learning_rate": 1.2800464281945764e-05,
"loss": 0.0136,
"step": 570500
},
{
"epoch": 8.61,
"learning_rate": 1.27853901927976e-05,
"loss": 0.0139,
"step": 571000
},
{
"epoch": 8.61,
"learning_rate": 1.2770316103649437e-05,
"loss": 0.0143,
"step": 571500
},
{
"epoch": 8.62,
"learning_rate": 1.2755242014501275e-05,
"loss": 0.0138,
"step": 572000
},
{
"epoch": 8.63,
"learning_rate": 1.274016792535311e-05,
"loss": 0.0138,
"step": 572500
},
{
"epoch": 8.64,
"learning_rate": 1.2725093836204948e-05,
"loss": 0.0135,
"step": 573000
},
{
"epoch": 8.64,
"learning_rate": 1.2710019747056783e-05,
"loss": 0.0138,
"step": 573500
},
{
"epoch": 8.65,
"learning_rate": 1.269494565790862e-05,
"loss": 0.0139,
"step": 574000
},
{
"epoch": 8.66,
"learning_rate": 1.2679871568760458e-05,
"loss": 0.0132,
"step": 574500
},
{
"epoch": 8.67,
"learning_rate": 1.2664797479612294e-05,
"loss": 0.0139,
"step": 575000
},
{
"epoch": 8.68,
"learning_rate": 1.2649723390464131e-05,
"loss": 0.0123,
"step": 575500
},
{
"epoch": 8.68,
"learning_rate": 1.2634649301315969e-05,
"loss": 0.0135,
"step": 576000
},
{
"epoch": 8.69,
"learning_rate": 1.2619575212167805e-05,
"loss": 0.015,
"step": 576500
},
{
"epoch": 8.7,
"learning_rate": 1.2604501123019642e-05,
"loss": 0.014,
"step": 577000
},
{
"epoch": 8.71,
"learning_rate": 1.2589427033871478e-05,
"loss": 0.0134,
"step": 577500
},
{
"epoch": 8.71,
"learning_rate": 1.2574352944723315e-05,
"loss": 0.0132,
"step": 578000
},
{
"epoch": 8.72,
"learning_rate": 1.2559278855575153e-05,
"loss": 0.0123,
"step": 578500
},
{
"epoch": 8.73,
"learning_rate": 1.2544204766426988e-05,
"loss": 0.0128,
"step": 579000
},
{
"epoch": 8.74,
"learning_rate": 1.2529130677278826e-05,
"loss": 0.0127,
"step": 579500
},
{
"epoch": 8.74,
"learning_rate": 1.2514056588130663e-05,
"loss": 0.013,
"step": 580000
},
{
"epoch": 8.75,
"learning_rate": 1.2498982498982499e-05,
"loss": 0.0127,
"step": 580500
},
{
"epoch": 8.76,
"learning_rate": 1.2483908409834336e-05,
"loss": 0.0143,
"step": 581000
},
{
"epoch": 8.77,
"learning_rate": 1.2468834320686172e-05,
"loss": 0.0128,
"step": 581500
},
{
"epoch": 8.77,
"learning_rate": 1.245376023153801e-05,
"loss": 0.0141,
"step": 582000
},
{
"epoch": 8.78,
"learning_rate": 1.2438686142389847e-05,
"loss": 0.0142,
"step": 582500
},
{
"epoch": 8.79,
"learning_rate": 1.2423612053241683e-05,
"loss": 0.0126,
"step": 583000
},
{
"epoch": 8.8,
"learning_rate": 1.240853796409352e-05,
"loss": 0.014,
"step": 583500
},
{
"epoch": 8.8,
"learning_rate": 1.2393463874945357e-05,
"loss": 0.0127,
"step": 584000
},
{
"epoch": 8.81,
"learning_rate": 1.2378389785797193e-05,
"loss": 0.0138,
"step": 584500
},
{
"epoch": 8.82,
"learning_rate": 1.236331569664903e-05,
"loss": 0.0135,
"step": 585000
},
{
"epoch": 8.83,
"learning_rate": 1.2348241607500866e-05,
"loss": 0.0138,
"step": 585500
},
{
"epoch": 8.83,
"learning_rate": 1.2333167518352704e-05,
"loss": 0.0139,
"step": 586000
},
{
"epoch": 8.84,
"learning_rate": 1.2318093429204541e-05,
"loss": 0.0141,
"step": 586500
},
{
"epoch": 8.85,
"learning_rate": 1.2303019340056377e-05,
"loss": 0.014,
"step": 587000
},
{
"epoch": 8.86,
"learning_rate": 1.2287945250908214e-05,
"loss": 0.0149,
"step": 587500
},
{
"epoch": 8.86,
"learning_rate": 1.227287116176005e-05,
"loss": 0.0129,
"step": 588000
},
{
"epoch": 8.87,
"learning_rate": 1.2257797072611887e-05,
"loss": 0.0141,
"step": 588500
},
{
"epoch": 8.88,
"learning_rate": 1.2242722983463725e-05,
"loss": 0.0136,
"step": 589000
},
{
"epoch": 8.89,
"learning_rate": 1.222764889431556e-05,
"loss": 0.0125,
"step": 589500
},
{
"epoch": 8.89,
"learning_rate": 1.2212574805167398e-05,
"loss": 0.0138,
"step": 590000
},
{
"epoch": 8.9,
"learning_rate": 1.2197500716019235e-05,
"loss": 0.013,
"step": 590500
},
{
"epoch": 8.91,
"learning_rate": 1.2182426626871071e-05,
"loss": 0.0142,
"step": 591000
},
{
"epoch": 8.92,
"learning_rate": 1.2167352537722909e-05,
"loss": 0.0136,
"step": 591500
},
{
"epoch": 8.92,
"learning_rate": 1.2152278448574744e-05,
"loss": 0.0132,
"step": 592000
},
{
"epoch": 8.93,
"learning_rate": 1.2137204359426582e-05,
"loss": 0.0142,
"step": 592500
},
{
"epoch": 8.94,
"learning_rate": 1.212213027027842e-05,
"loss": 0.0139,
"step": 593000
},
{
"epoch": 8.95,
"learning_rate": 1.2107056181130255e-05,
"loss": 0.0143,
"step": 593500
},
{
"epoch": 8.95,
"learning_rate": 1.2091982091982092e-05,
"loss": 0.0144,
"step": 594000
},
{
"epoch": 8.96,
"learning_rate": 1.207690800283393e-05,
"loss": 0.0136,
"step": 594500
},
{
"epoch": 8.97,
"learning_rate": 1.2061833913685765e-05,
"loss": 0.0138,
"step": 595000
},
{
"epoch": 8.98,
"learning_rate": 1.2046759824537603e-05,
"loss": 0.0122,
"step": 595500
},
{
"epoch": 8.98,
"learning_rate": 1.2031685735389439e-05,
"loss": 0.0135,
"step": 596000
},
{
"epoch": 8.99,
"learning_rate": 1.2016611646241276e-05,
"loss": 0.0135,
"step": 596500
},
{
"epoch": 9.0,
"learning_rate": 1.2001537557093113e-05,
"loss": 0.0138,
"step": 597000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9875720274572235,
"eval_f1": 0.9454708265624094,
"eval_loss": 0.05194343999028206,
"eval_precision": 0.929096987338888,
"eval_recall": 0.9624321442814008,
"eval_runtime": 244.5713,
"eval_samples_per_second": 482.166,
"eval_steps_per_second": 30.138,
"step": 597051
},
{
"epoch": 9.01,
"learning_rate": 1.198646346794495e-05,
"loss": 0.0115,
"step": 597500
},
{
"epoch": 9.01,
"learning_rate": 1.1971389378796787e-05,
"loss": 0.0096,
"step": 598000
},
{
"epoch": 9.02,
"learning_rate": 1.1956315289648624e-05,
"loss": 0.0106,
"step": 598500
},
{
"epoch": 9.03,
"learning_rate": 1.194124120050046e-05,
"loss": 0.0114,
"step": 599000
},
{
"epoch": 9.04,
"learning_rate": 1.1926167111352297e-05,
"loss": 0.0111,
"step": 599500
},
{
"epoch": 9.04,
"learning_rate": 1.1911093022204133e-05,
"loss": 0.0105,
"step": 600000
},
{
"epoch": 9.05,
"learning_rate": 1.189601893305597e-05,
"loss": 0.0104,
"step": 600500
},
{
"epoch": 9.06,
"learning_rate": 1.1880944843907808e-05,
"loss": 0.0109,
"step": 601000
},
{
"epoch": 9.07,
"learning_rate": 1.1865870754759643e-05,
"loss": 0.0118,
"step": 601500
},
{
"epoch": 9.07,
"learning_rate": 1.1850796665611481e-05,
"loss": 0.0109,
"step": 602000
},
{
"epoch": 9.08,
"learning_rate": 1.1835722576463318e-05,
"loss": 0.0111,
"step": 602500
},
{
"epoch": 9.09,
"learning_rate": 1.1820648487315154e-05,
"loss": 0.0106,
"step": 603000
},
{
"epoch": 9.1,
"learning_rate": 1.1805574398166991e-05,
"loss": 0.0112,
"step": 603500
},
{
"epoch": 9.1,
"learning_rate": 1.1790500309018827e-05,
"loss": 0.0099,
"step": 604000
},
{
"epoch": 9.11,
"learning_rate": 1.1775426219870665e-05,
"loss": 0.0108,
"step": 604500
},
{
"epoch": 9.12,
"learning_rate": 1.1760352130722502e-05,
"loss": 0.0104,
"step": 605000
},
{
"epoch": 9.13,
"learning_rate": 1.1745278041574338e-05,
"loss": 0.0117,
"step": 605500
},
{
"epoch": 9.13,
"learning_rate": 1.1730203952426175e-05,
"loss": 0.0108,
"step": 606000
},
{
"epoch": 9.14,
"learning_rate": 1.1715129863278013e-05,
"loss": 0.0111,
"step": 606500
},
{
"epoch": 9.15,
"learning_rate": 1.1700055774129848e-05,
"loss": 0.0104,
"step": 607000
},
{
"epoch": 9.16,
"learning_rate": 1.1684981684981686e-05,
"loss": 0.0118,
"step": 607500
},
{
"epoch": 9.17,
"learning_rate": 1.1669907595833521e-05,
"loss": 0.0106,
"step": 608000
},
{
"epoch": 9.17,
"learning_rate": 1.1654833506685359e-05,
"loss": 0.0107,
"step": 608500
},
{
"epoch": 9.18,
"learning_rate": 1.1639759417537196e-05,
"loss": 0.0109,
"step": 609000
},
{
"epoch": 9.19,
"learning_rate": 1.1624685328389032e-05,
"loss": 0.0109,
"step": 609500
},
{
"epoch": 9.2,
"learning_rate": 1.160961123924087e-05,
"loss": 0.0105,
"step": 610000
},
{
"epoch": 9.2,
"learning_rate": 1.1594537150092707e-05,
"loss": 0.0111,
"step": 610500
},
{
"epoch": 9.21,
"learning_rate": 1.1579463060944543e-05,
"loss": 0.0113,
"step": 611000
},
{
"epoch": 9.22,
"learning_rate": 1.156438897179638e-05,
"loss": 0.0101,
"step": 611500
},
{
"epoch": 9.23,
"learning_rate": 1.1549314882648216e-05,
"loss": 0.0105,
"step": 612000
},
{
"epoch": 9.23,
"learning_rate": 1.1534240793500053e-05,
"loss": 0.0115,
"step": 612500
},
{
"epoch": 9.24,
"learning_rate": 1.151916670435189e-05,
"loss": 0.0113,
"step": 613000
},
{
"epoch": 9.25,
"learning_rate": 1.1504092615203726e-05,
"loss": 0.0111,
"step": 613500
},
{
"epoch": 9.26,
"learning_rate": 1.1489018526055564e-05,
"loss": 0.0115,
"step": 614000
},
{
"epoch": 9.26,
"learning_rate": 1.1473944436907401e-05,
"loss": 0.0109,
"step": 614500
},
{
"epoch": 9.27,
"learning_rate": 1.1458870347759237e-05,
"loss": 0.012,
"step": 615000
},
{
"epoch": 9.28,
"learning_rate": 1.1443796258611074e-05,
"loss": 0.0119,
"step": 615500
},
{
"epoch": 9.29,
"learning_rate": 1.142872216946291e-05,
"loss": 0.0101,
"step": 616000
},
{
"epoch": 9.29,
"learning_rate": 1.1413648080314747e-05,
"loss": 0.0111,
"step": 616500
},
{
"epoch": 9.3,
"learning_rate": 1.1398573991166585e-05,
"loss": 0.0121,
"step": 617000
},
{
"epoch": 9.31,
"learning_rate": 1.138349990201842e-05,
"loss": 0.011,
"step": 617500
},
{
"epoch": 9.32,
"learning_rate": 1.1368425812870258e-05,
"loss": 0.011,
"step": 618000
},
{
"epoch": 9.32,
"learning_rate": 1.1353351723722094e-05,
"loss": 0.0114,
"step": 618500
},
{
"epoch": 9.33,
"learning_rate": 1.1338277634573931e-05,
"loss": 0.0119,
"step": 619000
},
{
"epoch": 9.34,
"learning_rate": 1.1323203545425769e-05,
"loss": 0.0098,
"step": 619500
},
{
"epoch": 9.35,
"learning_rate": 1.1308129456277604e-05,
"loss": 0.0115,
"step": 620000
},
{
"epoch": 9.35,
"learning_rate": 1.1293055367129442e-05,
"loss": 0.0114,
"step": 620500
},
{
"epoch": 9.36,
"learning_rate": 1.127798127798128e-05,
"loss": 0.0124,
"step": 621000
},
{
"epoch": 9.37,
"learning_rate": 1.1262907188833115e-05,
"loss": 0.0118,
"step": 621500
},
{
"epoch": 9.38,
"learning_rate": 1.1247833099684952e-05,
"loss": 0.0109,
"step": 622000
},
{
"epoch": 9.38,
"learning_rate": 1.1232759010536788e-05,
"loss": 0.0122,
"step": 622500
},
{
"epoch": 9.39,
"learning_rate": 1.1217684921388626e-05,
"loss": 0.0124,
"step": 623000
},
{
"epoch": 9.4,
"learning_rate": 1.1202610832240463e-05,
"loss": 0.012,
"step": 623500
},
{
"epoch": 9.41,
"learning_rate": 1.1187536743092299e-05,
"loss": 0.011,
"step": 624000
},
{
"epoch": 9.41,
"learning_rate": 1.1172462653944136e-05,
"loss": 0.0107,
"step": 624500
},
{
"epoch": 9.42,
"learning_rate": 1.1157388564795974e-05,
"loss": 0.0114,
"step": 625000
},
{
"epoch": 9.43,
"learning_rate": 1.114231447564781e-05,
"loss": 0.0122,
"step": 625500
},
{
"epoch": 9.44,
"learning_rate": 1.1127240386499647e-05,
"loss": 0.0128,
"step": 626000
},
{
"epoch": 9.44,
"learning_rate": 1.1112166297351482e-05,
"loss": 0.0115,
"step": 626500
},
{
"epoch": 9.45,
"learning_rate": 1.109709220820332e-05,
"loss": 0.011,
"step": 627000
},
{
"epoch": 9.46,
"learning_rate": 1.1082018119055157e-05,
"loss": 0.0114,
"step": 627500
},
{
"epoch": 9.47,
"learning_rate": 1.1066944029906993e-05,
"loss": 0.0115,
"step": 628000
},
{
"epoch": 9.47,
"learning_rate": 1.105186994075883e-05,
"loss": 0.0114,
"step": 628500
},
{
"epoch": 9.48,
"learning_rate": 1.1036795851610668e-05,
"loss": 0.0117,
"step": 629000
},
{
"epoch": 9.49,
"learning_rate": 1.1021721762462504e-05,
"loss": 0.0124,
"step": 629500
},
{
"epoch": 9.5,
"learning_rate": 1.1006647673314341e-05,
"loss": 0.0124,
"step": 630000
},
{
"epoch": 9.5,
"learning_rate": 1.0991573584166177e-05,
"loss": 0.0123,
"step": 630500
},
{
"epoch": 9.51,
"learning_rate": 1.0976499495018014e-05,
"loss": 0.0111,
"step": 631000
},
{
"epoch": 9.52,
"learning_rate": 1.0961425405869852e-05,
"loss": 0.0115,
"step": 631500
},
{
"epoch": 9.53,
"learning_rate": 1.0946351316721687e-05,
"loss": 0.0113,
"step": 632000
},
{
"epoch": 9.53,
"learning_rate": 1.0931277227573525e-05,
"loss": 0.0123,
"step": 632500
},
{
"epoch": 9.54,
"learning_rate": 1.0916203138425362e-05,
"loss": 0.0122,
"step": 633000
},
{
"epoch": 9.55,
"learning_rate": 1.0901129049277198e-05,
"loss": 0.0114,
"step": 633500
},
{
"epoch": 9.56,
"learning_rate": 1.0886054960129035e-05,
"loss": 0.0114,
"step": 634000
},
{
"epoch": 9.56,
"learning_rate": 1.0870980870980871e-05,
"loss": 0.0114,
"step": 634500
},
{
"epoch": 9.57,
"learning_rate": 1.0855906781832708e-05,
"loss": 0.0119,
"step": 635000
},
{
"epoch": 9.58,
"learning_rate": 1.0840832692684546e-05,
"loss": 0.0105,
"step": 635500
},
{
"epoch": 9.59,
"learning_rate": 1.0825758603536382e-05,
"loss": 0.0121,
"step": 636000
},
{
"epoch": 9.59,
"learning_rate": 1.0810684514388219e-05,
"loss": 0.0107,
"step": 636500
},
{
"epoch": 9.6,
"learning_rate": 1.0795610425240056e-05,
"loss": 0.0116,
"step": 637000
},
{
"epoch": 9.61,
"learning_rate": 1.0780536336091892e-05,
"loss": 0.012,
"step": 637500
},
{
"epoch": 9.62,
"learning_rate": 1.076546224694373e-05,
"loss": 0.0111,
"step": 638000
},
{
"epoch": 9.62,
"learning_rate": 1.0750388157795565e-05,
"loss": 0.0123,
"step": 638500
},
{
"epoch": 9.63,
"learning_rate": 1.0735314068647403e-05,
"loss": 0.0123,
"step": 639000
},
{
"epoch": 9.64,
"learning_rate": 1.072023997949924e-05,
"loss": 0.0125,
"step": 639500
},
{
"epoch": 9.65,
"learning_rate": 1.0705165890351076e-05,
"loss": 0.0119,
"step": 640000
},
{
"epoch": 9.65,
"learning_rate": 1.0690091801202913e-05,
"loss": 0.0118,
"step": 640500
},
{
"epoch": 9.66,
"learning_rate": 1.067501771205475e-05,
"loss": 0.0113,
"step": 641000
},
{
"epoch": 9.67,
"learning_rate": 1.0659943622906586e-05,
"loss": 0.0111,
"step": 641500
},
{
"epoch": 9.68,
"learning_rate": 1.0644869533758424e-05,
"loss": 0.0114,
"step": 642000
},
{
"epoch": 9.69,
"learning_rate": 1.062979544461026e-05,
"loss": 0.0126,
"step": 642500
},
{
"epoch": 9.69,
"learning_rate": 1.0614721355462097e-05,
"loss": 0.0118,
"step": 643000
},
{
"epoch": 9.7,
"learning_rate": 1.0599647266313934e-05,
"loss": 0.0113,
"step": 643500
},
{
"epoch": 9.71,
"learning_rate": 1.058457317716577e-05,
"loss": 0.0121,
"step": 644000
},
{
"epoch": 9.72,
"learning_rate": 1.0569499088017608e-05,
"loss": 0.0123,
"step": 644500
},
{
"epoch": 9.72,
"learning_rate": 1.0554424998869443e-05,
"loss": 0.0119,
"step": 645000
},
{
"epoch": 9.73,
"learning_rate": 1.053935090972128e-05,
"loss": 0.0123,
"step": 645500
},
{
"epoch": 9.74,
"learning_rate": 1.0524276820573118e-05,
"loss": 0.0111,
"step": 646000
},
{
"epoch": 9.75,
"learning_rate": 1.0509202731424954e-05,
"loss": 0.0112,
"step": 646500
},
{
"epoch": 9.75,
"learning_rate": 1.0494128642276791e-05,
"loss": 0.0114,
"step": 647000
},
{
"epoch": 9.76,
"learning_rate": 1.0479054553128629e-05,
"loss": 0.0112,
"step": 647500
},
{
"epoch": 9.77,
"learning_rate": 1.0463980463980464e-05,
"loss": 0.0119,
"step": 648000
},
{
"epoch": 9.78,
"learning_rate": 1.0448906374832302e-05,
"loss": 0.0113,
"step": 648500
},
{
"epoch": 9.78,
"learning_rate": 1.0433832285684138e-05,
"loss": 0.0112,
"step": 649000
},
{
"epoch": 9.79,
"learning_rate": 1.0418758196535975e-05,
"loss": 0.0118,
"step": 649500
},
{
"epoch": 9.8,
"learning_rate": 1.0403684107387812e-05,
"loss": 0.0118,
"step": 650000
},
{
"epoch": 9.81,
"learning_rate": 1.0388610018239648e-05,
"loss": 0.0114,
"step": 650500
},
{
"epoch": 9.81,
"learning_rate": 1.0373535929091486e-05,
"loss": 0.0124,
"step": 651000
},
{
"epoch": 9.82,
"learning_rate": 1.0358461839943323e-05,
"loss": 0.0127,
"step": 651500
},
{
"epoch": 9.83,
"learning_rate": 1.0343387750795159e-05,
"loss": 0.0122,
"step": 652000
},
{
"epoch": 9.84,
"learning_rate": 1.0328313661646996e-05,
"loss": 0.0117,
"step": 652500
},
{
"epoch": 9.84,
"learning_rate": 1.0313239572498832e-05,
"loss": 0.013,
"step": 653000
},
{
"epoch": 9.85,
"learning_rate": 1.029816548335067e-05,
"loss": 0.0121,
"step": 653500
},
{
"epoch": 9.86,
"learning_rate": 1.0283091394202507e-05,
"loss": 0.0123,
"step": 654000
},
{
"epoch": 9.87,
"learning_rate": 1.0268017305054342e-05,
"loss": 0.0115,
"step": 654500
},
{
"epoch": 9.87,
"learning_rate": 1.025294321590618e-05,
"loss": 0.0122,
"step": 655000
},
{
"epoch": 9.88,
"learning_rate": 1.0237869126758017e-05,
"loss": 0.0112,
"step": 655500
},
{
"epoch": 9.89,
"learning_rate": 1.0222795037609853e-05,
"loss": 0.0119,
"step": 656000
},
{
"epoch": 9.9,
"learning_rate": 1.020772094846169e-05,
"loss": 0.0119,
"step": 656500
},
{
"epoch": 9.9,
"learning_rate": 1.0192646859313526e-05,
"loss": 0.0119,
"step": 657000
},
{
"epoch": 9.91,
"learning_rate": 1.0177572770165364e-05,
"loss": 0.0123,
"step": 657500
},
{
"epoch": 9.92,
"learning_rate": 1.0162498681017201e-05,
"loss": 0.0121,
"step": 658000
},
{
"epoch": 9.93,
"learning_rate": 1.0147424591869037e-05,
"loss": 0.0117,
"step": 658500
},
{
"epoch": 9.93,
"learning_rate": 1.0132350502720874e-05,
"loss": 0.0121,
"step": 659000
},
{
"epoch": 9.94,
"learning_rate": 1.0117276413572712e-05,
"loss": 0.0112,
"step": 659500
},
{
"epoch": 9.95,
"learning_rate": 1.0102202324424547e-05,
"loss": 0.0116,
"step": 660000
},
{
"epoch": 9.96,
"learning_rate": 1.0087128235276385e-05,
"loss": 0.0113,
"step": 660500
},
{
"epoch": 9.96,
"learning_rate": 1.007205414612822e-05,
"loss": 0.012,
"step": 661000
},
{
"epoch": 9.97,
"learning_rate": 1.0056980056980058e-05,
"loss": 0.0121,
"step": 661500
},
{
"epoch": 9.98,
"learning_rate": 1.0041905967831895e-05,
"loss": 0.0132,
"step": 662000
},
{
"epoch": 9.99,
"learning_rate": 1.0026831878683731e-05,
"loss": 0.0118,
"step": 662500
},
{
"epoch": 9.99,
"learning_rate": 1.0011757789535568e-05,
"loss": 0.0121,
"step": 663000
},
{
"epoch": 10.0,
"eval_accuracy": 0.9875053142522836,
"eval_f1": 0.9440039434689302,
"eval_loss": 0.05791113153100014,
"eval_precision": 0.9299003991141325,
"eval_recall": 0.9585418853919965,
"eval_runtime": 248.4287,
"eval_samples_per_second": 474.679,
"eval_steps_per_second": 29.67,
"step": 663390
},
{
"epoch": 10.0,
"learning_rate": 9.996683700387406e-06,
"loss": 0.0104,
"step": 663500
},
{
"epoch": 10.01,
"learning_rate": 9.981609611239242e-06,
"loss": 0.009,
"step": 664000
},
{
"epoch": 10.02,
"learning_rate": 9.966535522091079e-06,
"loss": 0.0099,
"step": 664500
},
{
"epoch": 10.02,
"learning_rate": 9.951461432942915e-06,
"loss": 0.01,
"step": 665000
},
{
"epoch": 10.03,
"learning_rate": 9.936387343794752e-06,
"loss": 0.01,
"step": 665500
},
{
"epoch": 10.04,
"learning_rate": 9.92131325464659e-06,
"loss": 0.0096,
"step": 666000
},
{
"epoch": 10.05,
"learning_rate": 9.906239165498425e-06,
"loss": 0.0098,
"step": 666500
},
{
"epoch": 10.05,
"learning_rate": 9.891165076350263e-06,
"loss": 0.0091,
"step": 667000
},
{
"epoch": 10.06,
"learning_rate": 9.8760909872021e-06,
"loss": 0.01,
"step": 667500
},
{
"epoch": 10.07,
"learning_rate": 9.861016898053936e-06,
"loss": 0.0095,
"step": 668000
},
{
"epoch": 10.08,
"learning_rate": 9.845942808905773e-06,
"loss": 0.0093,
"step": 668500
},
{
"epoch": 10.08,
"learning_rate": 9.830868719757609e-06,
"loss": 0.0096,
"step": 669000
},
{
"epoch": 10.09,
"learning_rate": 9.815794630609446e-06,
"loss": 0.0097,
"step": 669500
},
{
"epoch": 10.1,
"learning_rate": 9.800720541461284e-06,
"loss": 0.0099,
"step": 670000
},
{
"epoch": 10.11,
"learning_rate": 9.78564645231312e-06,
"loss": 0.0095,
"step": 670500
},
{
"epoch": 10.11,
"learning_rate": 9.770572363164957e-06,
"loss": 0.0088,
"step": 671000
},
{
"epoch": 10.12,
"learning_rate": 9.755498274016793e-06,
"loss": 0.01,
"step": 671500
},
{
"epoch": 10.13,
"learning_rate": 9.740424184868628e-06,
"loss": 0.0104,
"step": 672000
},
{
"epoch": 10.14,
"learning_rate": 9.725350095720466e-06,
"loss": 0.0102,
"step": 672500
},
{
"epoch": 10.14,
"learning_rate": 9.710276006572302e-06,
"loss": 0.0098,
"step": 673000
},
{
"epoch": 10.15,
"learning_rate": 9.695201917424139e-06,
"loss": 0.0102,
"step": 673500
},
{
"epoch": 10.16,
"learning_rate": 9.680127828275976e-06,
"loss": 0.0093,
"step": 674000
},
{
"epoch": 10.17,
"learning_rate": 9.665053739127812e-06,
"loss": 0.0099,
"step": 674500
},
{
"epoch": 10.18,
"learning_rate": 9.64997964997965e-06,
"loss": 0.0091,
"step": 675000
},
{
"epoch": 10.18,
"learning_rate": 9.634905560831487e-06,
"loss": 0.0099,
"step": 675500
},
{
"epoch": 10.19,
"learning_rate": 9.619831471683323e-06,
"loss": 0.0095,
"step": 676000
},
{
"epoch": 10.2,
"learning_rate": 9.60475738253516e-06,
"loss": 0.0102,
"step": 676500
},
{
"epoch": 10.21,
"learning_rate": 9.589683293386996e-06,
"loss": 0.0094,
"step": 677000
},
{
"epoch": 10.21,
"learning_rate": 9.574609204238833e-06,
"loss": 0.0106,
"step": 677500
},
{
"epoch": 10.22,
"learning_rate": 9.55953511509067e-06,
"loss": 0.0087,
"step": 678000
},
{
"epoch": 10.23,
"learning_rate": 9.544461025942506e-06,
"loss": 0.0107,
"step": 678500
},
{
"epoch": 10.24,
"learning_rate": 9.529386936794344e-06,
"loss": 0.0091,
"step": 679000
},
{
"epoch": 10.24,
"learning_rate": 9.51431284764618e-06,
"loss": 0.0097,
"step": 679500
},
{
"epoch": 10.25,
"learning_rate": 9.499238758498017e-06,
"loss": 0.0095,
"step": 680000
},
{
"epoch": 10.26,
"learning_rate": 9.484164669349854e-06,
"loss": 0.0096,
"step": 680500
},
{
"epoch": 10.27,
"learning_rate": 9.46909058020169e-06,
"loss": 0.01,
"step": 681000
},
{
"epoch": 10.27,
"learning_rate": 9.454016491053528e-06,
"loss": 0.0097,
"step": 681500
},
{
"epoch": 10.28,
"learning_rate": 9.438942401905365e-06,
"loss": 0.0091,
"step": 682000
},
{
"epoch": 10.29,
"learning_rate": 9.4238683127572e-06,
"loss": 0.01,
"step": 682500
},
{
"epoch": 10.3,
"learning_rate": 9.408794223609038e-06,
"loss": 0.0085,
"step": 683000
},
{
"epoch": 10.3,
"learning_rate": 9.393720134460874e-06,
"loss": 0.0099,
"step": 683500
},
{
"epoch": 10.31,
"learning_rate": 9.378646045312711e-06,
"loss": 0.0094,
"step": 684000
},
{
"epoch": 10.32,
"learning_rate": 9.363571956164549e-06,
"loss": 0.0082,
"step": 684500
},
{
"epoch": 10.33,
"learning_rate": 9.348497867016385e-06,
"loss": 0.0099,
"step": 685000
},
{
"epoch": 10.33,
"learning_rate": 9.333423777868222e-06,
"loss": 0.0096,
"step": 685500
},
{
"epoch": 10.34,
"learning_rate": 9.31834968872006e-06,
"loss": 0.0107,
"step": 686000
},
{
"epoch": 10.35,
"learning_rate": 9.303275599571895e-06,
"loss": 0.0095,
"step": 686500
},
{
"epoch": 10.36,
"learning_rate": 9.288201510423733e-06,
"loss": 0.01,
"step": 687000
},
{
"epoch": 10.36,
"learning_rate": 9.273127421275568e-06,
"loss": 0.009,
"step": 687500
},
{
"epoch": 10.37,
"learning_rate": 9.258053332127406e-06,
"loss": 0.0089,
"step": 688000
},
{
"epoch": 10.38,
"learning_rate": 9.242979242979243e-06,
"loss": 0.01,
"step": 688500
},
{
"epoch": 10.39,
"learning_rate": 9.227905153831079e-06,
"loss": 0.0091,
"step": 689000
},
{
"epoch": 10.39,
"learning_rate": 9.212831064682916e-06,
"loss": 0.0094,
"step": 689500
},
{
"epoch": 10.4,
"learning_rate": 9.197756975534754e-06,
"loss": 0.0097,
"step": 690000
},
{
"epoch": 10.41,
"learning_rate": 9.18268288638659e-06,
"loss": 0.0096,
"step": 690500
},
{
"epoch": 10.42,
"learning_rate": 9.167608797238427e-06,
"loss": 0.0103,
"step": 691000
},
{
"epoch": 10.42,
"learning_rate": 9.152534708090263e-06,
"loss": 0.0096,
"step": 691500
},
{
"epoch": 10.43,
"learning_rate": 9.1374606189421e-06,
"loss": 0.0093,
"step": 692000
},
{
"epoch": 10.44,
"learning_rate": 9.122386529793937e-06,
"loss": 0.0098,
"step": 692500
},
{
"epoch": 10.45,
"learning_rate": 9.107312440645773e-06,
"loss": 0.0102,
"step": 693000
},
{
"epoch": 10.45,
"learning_rate": 9.09223835149761e-06,
"loss": 0.0098,
"step": 693500
},
{
"epoch": 10.46,
"learning_rate": 9.077164262349448e-06,
"loss": 0.0097,
"step": 694000
},
{
"epoch": 10.47,
"learning_rate": 9.062090173201284e-06,
"loss": 0.0103,
"step": 694500
},
{
"epoch": 10.48,
"learning_rate": 9.047016084053121e-06,
"loss": 0.0099,
"step": 695000
},
{
"epoch": 10.48,
"learning_rate": 9.031941994904957e-06,
"loss": 0.0092,
"step": 695500
},
{
"epoch": 10.49,
"learning_rate": 9.016867905756794e-06,
"loss": 0.0093,
"step": 696000
},
{
"epoch": 10.5,
"learning_rate": 9.001793816608632e-06,
"loss": 0.0094,
"step": 696500
},
{
"epoch": 10.51,
"learning_rate": 8.986719727460467e-06,
"loss": 0.0097,
"step": 697000
},
{
"epoch": 10.51,
"learning_rate": 8.971645638312305e-06,
"loss": 0.0092,
"step": 697500
},
{
"epoch": 10.52,
"learning_rate": 8.956571549164142e-06,
"loss": 0.0107,
"step": 698000
},
{
"epoch": 10.53,
"learning_rate": 8.941497460015978e-06,
"loss": 0.01,
"step": 698500
},
{
"epoch": 10.54,
"learning_rate": 8.926423370867815e-06,
"loss": 0.0098,
"step": 699000
},
{
"epoch": 10.54,
"learning_rate": 8.911349281719651e-06,
"loss": 0.0093,
"step": 699500
},
{
"epoch": 10.55,
"learning_rate": 8.896275192571489e-06,
"loss": 0.0102,
"step": 700000
},
{
"epoch": 10.56,
"learning_rate": 8.881201103423326e-06,
"loss": 0.0098,
"step": 700500
},
{
"epoch": 10.57,
"learning_rate": 8.866127014275162e-06,
"loss": 0.0106,
"step": 701000
},
{
"epoch": 10.57,
"learning_rate": 8.851052925126999e-06,
"loss": 0.0095,
"step": 701500
},
{
"epoch": 10.58,
"learning_rate": 8.835978835978837e-06,
"loss": 0.0105,
"step": 702000
},
{
"epoch": 10.59,
"learning_rate": 8.820904746830672e-06,
"loss": 0.0105,
"step": 702500
},
{
"epoch": 10.6,
"learning_rate": 8.80583065768251e-06,
"loss": 0.0103,
"step": 703000
},
{
"epoch": 10.6,
"learning_rate": 8.790756568534345e-06,
"loss": 0.0101,
"step": 703500
},
{
"epoch": 10.61,
"learning_rate": 8.775682479386183e-06,
"loss": 0.0095,
"step": 704000
},
{
"epoch": 10.62,
"learning_rate": 8.76060839023802e-06,
"loss": 0.0103,
"step": 704500
},
{
"epoch": 10.63,
"learning_rate": 8.745534301089856e-06,
"loss": 0.0102,
"step": 705000
},
{
"epoch": 10.63,
"learning_rate": 8.730460211941693e-06,
"loss": 0.0102,
"step": 705500
},
{
"epoch": 10.64,
"learning_rate": 8.715386122793529e-06,
"loss": 0.01,
"step": 706000
},
{
"epoch": 10.65,
"learning_rate": 8.700312033645367e-06,
"loss": 0.0098,
"step": 706500
},
{
"epoch": 10.66,
"learning_rate": 8.685237944497204e-06,
"loss": 0.0091,
"step": 707000
},
{
"epoch": 10.66,
"learning_rate": 8.67016385534904e-06,
"loss": 0.0106,
"step": 707500
},
{
"epoch": 10.67,
"learning_rate": 8.655089766200877e-06,
"loss": 0.0102,
"step": 708000
},
{
"epoch": 10.68,
"learning_rate": 8.640015677052715e-06,
"loss": 0.0094,
"step": 708500
},
{
"epoch": 10.69,
"learning_rate": 8.62494158790455e-06,
"loss": 0.0101,
"step": 709000
},
{
"epoch": 10.7,
"learning_rate": 8.609867498756388e-06,
"loss": 0.0099,
"step": 709500
},
{
"epoch": 10.7,
"learning_rate": 8.594793409608223e-06,
"loss": 0.0095,
"step": 710000
},
{
"epoch": 10.71,
"learning_rate": 8.57971932046006e-06,
"loss": 0.0098,
"step": 710500
},
{
"epoch": 10.72,
"learning_rate": 8.564645231311898e-06,
"loss": 0.0094,
"step": 711000
},
{
"epoch": 10.73,
"learning_rate": 8.549571142163734e-06,
"loss": 0.0097,
"step": 711500
},
{
"epoch": 10.73,
"learning_rate": 8.534497053015571e-06,
"loss": 0.0099,
"step": 712000
},
{
"epoch": 10.74,
"learning_rate": 8.519422963867409e-06,
"loss": 0.0104,
"step": 712500
},
{
"epoch": 10.75,
"learning_rate": 8.504348874719245e-06,
"loss": 0.0086,
"step": 713000
},
{
"epoch": 10.76,
"learning_rate": 8.489274785571082e-06,
"loss": 0.0099,
"step": 713500
},
{
"epoch": 10.76,
"learning_rate": 8.474200696422918e-06,
"loss": 0.0101,
"step": 714000
},
{
"epoch": 10.77,
"learning_rate": 8.459126607274755e-06,
"loss": 0.0099,
"step": 714500
},
{
"epoch": 10.78,
"learning_rate": 8.444052518126593e-06,
"loss": 0.0097,
"step": 715000
},
{
"epoch": 10.79,
"learning_rate": 8.428978428978428e-06,
"loss": 0.0093,
"step": 715500
},
{
"epoch": 10.79,
"learning_rate": 8.413904339830266e-06,
"loss": 0.0105,
"step": 716000
},
{
"epoch": 10.8,
"learning_rate": 8.398830250682103e-06,
"loss": 0.0086,
"step": 716500
},
{
"epoch": 10.81,
"learning_rate": 8.383756161533939e-06,
"loss": 0.0105,
"step": 717000
},
{
"epoch": 10.82,
"learning_rate": 8.368682072385776e-06,
"loss": 0.0105,
"step": 717500
},
{
"epoch": 10.82,
"learning_rate": 8.353607983237612e-06,
"loss": 0.0094,
"step": 718000
},
{
"epoch": 10.83,
"learning_rate": 8.33853389408945e-06,
"loss": 0.0098,
"step": 718500
},
{
"epoch": 10.84,
"learning_rate": 8.323459804941287e-06,
"loss": 0.0095,
"step": 719000
},
{
"epoch": 10.85,
"learning_rate": 8.308385715793123e-06,
"loss": 0.01,
"step": 719500
},
{
"epoch": 10.85,
"learning_rate": 8.29331162664496e-06,
"loss": 0.0091,
"step": 720000
},
{
"epoch": 10.86,
"learning_rate": 8.278237537496797e-06,
"loss": 0.0095,
"step": 720500
},
{
"epoch": 10.87,
"learning_rate": 8.263163448348633e-06,
"loss": 0.0107,
"step": 721000
},
{
"epoch": 10.88,
"learning_rate": 8.24808935920047e-06,
"loss": 0.011,
"step": 721500
},
{
"epoch": 10.88,
"learning_rate": 8.233015270052306e-06,
"loss": 0.0083,
"step": 722000
},
{
"epoch": 10.89,
"learning_rate": 8.217941180904144e-06,
"loss": 0.0102,
"step": 722500
},
{
"epoch": 10.9,
"learning_rate": 8.202867091755981e-06,
"loss": 0.0101,
"step": 723000
},
{
"epoch": 10.91,
"learning_rate": 8.187793002607817e-06,
"loss": 0.0105,
"step": 723500
},
{
"epoch": 10.91,
"learning_rate": 8.172718913459654e-06,
"loss": 0.0105,
"step": 724000
},
{
"epoch": 10.92,
"learning_rate": 8.157644824311492e-06,
"loss": 0.0101,
"step": 724500
},
{
"epoch": 10.93,
"learning_rate": 8.142570735163327e-06,
"loss": 0.0095,
"step": 725000
},
{
"epoch": 10.94,
"learning_rate": 8.127496646015165e-06,
"loss": 0.0101,
"step": 725500
},
{
"epoch": 10.94,
"learning_rate": 8.112422556867e-06,
"loss": 0.0109,
"step": 726000
},
{
"epoch": 10.95,
"learning_rate": 8.097348467718838e-06,
"loss": 0.0097,
"step": 726500
},
{
"epoch": 10.96,
"learning_rate": 8.082274378570675e-06,
"loss": 0.0093,
"step": 727000
},
{
"epoch": 10.97,
"learning_rate": 8.067200289422511e-06,
"loss": 0.01,
"step": 727500
},
{
"epoch": 10.97,
"learning_rate": 8.052126200274349e-06,
"loss": 0.0089,
"step": 728000
},
{
"epoch": 10.98,
"learning_rate": 8.037052111126186e-06,
"loss": 0.0101,
"step": 728500
},
{
"epoch": 10.99,
"learning_rate": 8.021978021978022e-06,
"loss": 0.0101,
"step": 729000
},
{
"epoch": 11.0,
"learning_rate": 8.00690393282986e-06,
"loss": 0.0099,
"step": 729500
},
{
"epoch": 11.0,
"eval_accuracy": 0.9879735586773043,
"eval_f1": 0.9489221995763095,
"eval_loss": 0.06842657923698425,
"eval_precision": 0.9366573774195978,
"eval_recall": 0.9615124805690521,
"eval_runtime": 214.3851,
"eval_samples_per_second": 550.057,
"eval_steps_per_second": 34.382,
"step": 729729
},
{
"epoch": 11.0,
"learning_rate": 7.991829843681695e-06,
"loss": 0.0083,
"step": 730000
},
{
"epoch": 11.01,
"learning_rate": 7.976755754533532e-06,
"loss": 0.0087,
"step": 730500
},
{
"epoch": 11.02,
"learning_rate": 7.96168166538537e-06,
"loss": 0.0079,
"step": 731000
},
{
"epoch": 11.03,
"learning_rate": 7.946607576237205e-06,
"loss": 0.0077,
"step": 731500
},
{
"epoch": 11.03,
"learning_rate": 7.931533487089043e-06,
"loss": 0.0071,
"step": 732000
},
{
"epoch": 11.04,
"learning_rate": 7.91645939794088e-06,
"loss": 0.0078,
"step": 732500
},
{
"epoch": 11.05,
"learning_rate": 7.901385308792716e-06,
"loss": 0.0075,
"step": 733000
},
{
"epoch": 11.06,
"learning_rate": 7.886311219644553e-06,
"loss": 0.008,
"step": 733500
},
{
"epoch": 11.06,
"learning_rate": 7.87123713049639e-06,
"loss": 0.0079,
"step": 734000
},
{
"epoch": 11.07,
"learning_rate": 7.856163041348227e-06,
"loss": 0.0077,
"step": 734500
},
{
"epoch": 11.08,
"learning_rate": 7.841088952200064e-06,
"loss": 0.0082,
"step": 735000
},
{
"epoch": 11.09,
"learning_rate": 7.8260148630519e-06,
"loss": 0.0084,
"step": 735500
},
{
"epoch": 11.09,
"learning_rate": 7.810940773903737e-06,
"loss": 0.0078,
"step": 736000
},
{
"epoch": 11.1,
"learning_rate": 7.795866684755573e-06,
"loss": 0.0079,
"step": 736500
},
{
"epoch": 11.11,
"learning_rate": 7.78079259560741e-06,
"loss": 0.0078,
"step": 737000
},
{
"epoch": 11.12,
"learning_rate": 7.765718506459248e-06,
"loss": 0.0079,
"step": 737500
},
{
"epoch": 11.12,
"learning_rate": 7.750644417311083e-06,
"loss": 0.0075,
"step": 738000
},
{
"epoch": 11.13,
"learning_rate": 7.735570328162921e-06,
"loss": 0.0075,
"step": 738500
},
{
"epoch": 11.14,
"learning_rate": 7.720496239014758e-06,
"loss": 0.0076,
"step": 739000
},
{
"epoch": 11.15,
"learning_rate": 7.705422149866594e-06,
"loss": 0.0085,
"step": 739500
},
{
"epoch": 11.15,
"learning_rate": 7.690348060718431e-06,
"loss": 0.0082,
"step": 740000
},
{
"epoch": 11.16,
"learning_rate": 7.675273971570267e-06,
"loss": 0.0072,
"step": 740500
},
{
"epoch": 11.17,
"learning_rate": 7.660199882422105e-06,
"loss": 0.0083,
"step": 741000
},
{
"epoch": 11.18,
"learning_rate": 7.645125793273942e-06,
"loss": 0.0073,
"step": 741500
},
{
"epoch": 11.18,
"learning_rate": 7.630051704125778e-06,
"loss": 0.0079,
"step": 742000
},
{
"epoch": 11.19,
"learning_rate": 7.614977614977615e-06,
"loss": 0.0077,
"step": 742500
},
{
"epoch": 11.2,
"learning_rate": 7.599903525829452e-06,
"loss": 0.0095,
"step": 743000
},
{
"epoch": 11.21,
"learning_rate": 7.584829436681288e-06,
"loss": 0.0088,
"step": 743500
},
{
"epoch": 11.22,
"learning_rate": 7.569755347533126e-06,
"loss": 0.0084,
"step": 744000
},
{
"epoch": 11.22,
"learning_rate": 7.554681258384962e-06,
"loss": 0.0087,
"step": 744500
},
{
"epoch": 11.23,
"learning_rate": 7.539607169236799e-06,
"loss": 0.0079,
"step": 745000
},
{
"epoch": 11.24,
"learning_rate": 7.5245330800886355e-06,
"loss": 0.0093,
"step": 745500
},
{
"epoch": 11.25,
"learning_rate": 7.509458990940473e-06,
"loss": 0.0079,
"step": 746000
},
{
"epoch": 11.25,
"learning_rate": 7.4943849017923095e-06,
"loss": 0.0079,
"step": 746500
},
{
"epoch": 11.26,
"learning_rate": 7.479310812644146e-06,
"loss": 0.0082,
"step": 747000
},
{
"epoch": 11.27,
"learning_rate": 7.464236723495983e-06,
"loss": 0.0082,
"step": 747500
},
{
"epoch": 11.28,
"learning_rate": 7.44916263434782e-06,
"loss": 0.0083,
"step": 748000
},
{
"epoch": 11.28,
"learning_rate": 7.434088545199657e-06,
"loss": 0.008,
"step": 748500
},
{
"epoch": 11.29,
"learning_rate": 7.419014456051493e-06,
"loss": 0.0091,
"step": 749000
},
{
"epoch": 11.3,
"learning_rate": 7.40394036690333e-06,
"loss": 0.0079,
"step": 749500
},
{
"epoch": 11.31,
"learning_rate": 7.388866277755167e-06,
"loss": 0.0079,
"step": 750000
},
{
"epoch": 11.31,
"learning_rate": 7.373792188607004e-06,
"loss": 0.0076,
"step": 750500
},
{
"epoch": 11.32,
"learning_rate": 7.35871809945884e-06,
"loss": 0.0083,
"step": 751000
},
{
"epoch": 11.33,
"learning_rate": 7.343644010310677e-06,
"loss": 0.0083,
"step": 751500
},
{
"epoch": 11.34,
"learning_rate": 7.328569921162514e-06,
"loss": 0.0088,
"step": 752000
},
{
"epoch": 11.34,
"learning_rate": 7.313495832014351e-06,
"loss": 0.0085,
"step": 752500
},
{
"epoch": 11.35,
"learning_rate": 7.2984217428661875e-06,
"loss": 0.0078,
"step": 753000
},
{
"epoch": 11.36,
"learning_rate": 7.283347653718024e-06,
"loss": 0.009,
"step": 753500
},
{
"epoch": 11.37,
"learning_rate": 7.2682735645698615e-06,
"loss": 0.0084,
"step": 754000
},
{
"epoch": 11.37,
"learning_rate": 7.253199475421698e-06,
"loss": 0.0077,
"step": 754500
},
{
"epoch": 11.38,
"learning_rate": 7.238125386273535e-06,
"loss": 0.0082,
"step": 755000
},
{
"epoch": 11.39,
"learning_rate": 7.223051297125371e-06,
"loss": 0.0089,
"step": 755500
},
{
"epoch": 11.4,
"learning_rate": 7.207977207977209e-06,
"loss": 0.0083,
"step": 756000
},
{
"epoch": 11.4,
"learning_rate": 7.192903118829045e-06,
"loss": 0.0077,
"step": 756500
},
{
"epoch": 11.41,
"learning_rate": 7.177829029680882e-06,
"loss": 0.0089,
"step": 757000
},
{
"epoch": 11.42,
"learning_rate": 7.162754940532718e-06,
"loss": 0.0083,
"step": 757500
},
{
"epoch": 11.43,
"learning_rate": 7.147680851384556e-06,
"loss": 0.0082,
"step": 758000
},
{
"epoch": 11.43,
"learning_rate": 7.132606762236392e-06,
"loss": 0.0076,
"step": 758500
},
{
"epoch": 11.44,
"learning_rate": 7.117532673088229e-06,
"loss": 0.0091,
"step": 759000
},
{
"epoch": 11.45,
"learning_rate": 7.1024585839400655e-06,
"loss": 0.0091,
"step": 759500
},
{
"epoch": 11.46,
"learning_rate": 7.087384494791903e-06,
"loss": 0.0086,
"step": 760000
},
{
"epoch": 11.46,
"learning_rate": 7.0723104056437395e-06,
"loss": 0.0082,
"step": 760500
},
{
"epoch": 11.47,
"learning_rate": 7.057236316495576e-06,
"loss": 0.0085,
"step": 761000
},
{
"epoch": 11.48,
"learning_rate": 7.042162227347413e-06,
"loss": 0.0093,
"step": 761500
},
{
"epoch": 11.49,
"learning_rate": 7.02708813819925e-06,
"loss": 0.0082,
"step": 762000
},
{
"epoch": 11.49,
"learning_rate": 7.012014049051087e-06,
"loss": 0.0081,
"step": 762500
},
{
"epoch": 11.5,
"learning_rate": 6.996939959902923e-06,
"loss": 0.0088,
"step": 763000
},
{
"epoch": 11.51,
"learning_rate": 6.98186587075476e-06,
"loss": 0.0084,
"step": 763500
},
{
"epoch": 11.52,
"learning_rate": 6.966791781606596e-06,
"loss": 0.0084,
"step": 764000
},
{
"epoch": 11.52,
"learning_rate": 6.951717692458434e-06,
"loss": 0.0083,
"step": 764500
},
{
"epoch": 11.53,
"learning_rate": 6.93664360331027e-06,
"loss": 0.0081,
"step": 765000
},
{
"epoch": 11.54,
"learning_rate": 6.921569514162107e-06,
"loss": 0.0086,
"step": 765500
},
{
"epoch": 11.55,
"learning_rate": 6.9064954250139435e-06,
"loss": 0.0079,
"step": 766000
},
{
"epoch": 11.55,
"learning_rate": 6.891421335865781e-06,
"loss": 0.0077,
"step": 766500
},
{
"epoch": 11.56,
"learning_rate": 6.8763472467176175e-06,
"loss": 0.0077,
"step": 767000
},
{
"epoch": 11.57,
"learning_rate": 6.861273157569454e-06,
"loss": 0.0083,
"step": 767500
},
{
"epoch": 11.58,
"learning_rate": 6.846199068421291e-06,
"loss": 0.0086,
"step": 768000
},
{
"epoch": 11.58,
"learning_rate": 6.831124979273128e-06,
"loss": 0.008,
"step": 768500
},
{
"epoch": 11.59,
"learning_rate": 6.816050890124965e-06,
"loss": 0.009,
"step": 769000
},
{
"epoch": 11.6,
"learning_rate": 6.800976800976801e-06,
"loss": 0.0089,
"step": 769500
},
{
"epoch": 11.61,
"learning_rate": 6.785902711828638e-06,
"loss": 0.0079,
"step": 770000
},
{
"epoch": 11.61,
"learning_rate": 6.770828622680475e-06,
"loss": 0.009,
"step": 770500
},
{
"epoch": 11.62,
"learning_rate": 6.755754533532312e-06,
"loss": 0.0078,
"step": 771000
},
{
"epoch": 11.63,
"learning_rate": 6.740680444384148e-06,
"loss": 0.0082,
"step": 771500
},
{
"epoch": 11.64,
"learning_rate": 6.725606355235985e-06,
"loss": 0.0087,
"step": 772000
},
{
"epoch": 11.64,
"learning_rate": 6.710532266087822e-06,
"loss": 0.0078,
"step": 772500
},
{
"epoch": 11.65,
"learning_rate": 6.695458176939659e-06,
"loss": 0.0084,
"step": 773000
},
{
"epoch": 11.66,
"learning_rate": 6.6803840877914955e-06,
"loss": 0.0087,
"step": 773500
},
{
"epoch": 11.67,
"learning_rate": 6.665309998643332e-06,
"loss": 0.009,
"step": 774000
},
{
"epoch": 11.67,
"learning_rate": 6.6502359094951695e-06,
"loss": 0.0086,
"step": 774500
},
{
"epoch": 11.68,
"learning_rate": 6.635161820347006e-06,
"loss": 0.0084,
"step": 775000
},
{
"epoch": 11.69,
"learning_rate": 6.620087731198843e-06,
"loss": 0.0088,
"step": 775500
},
{
"epoch": 11.7,
"learning_rate": 6.605013642050679e-06,
"loss": 0.0088,
"step": 776000
},
{
"epoch": 11.71,
"learning_rate": 6.589939552902517e-06,
"loss": 0.0091,
"step": 776500
},
{
"epoch": 11.71,
"learning_rate": 6.574865463754353e-06,
"loss": 0.0087,
"step": 777000
},
{
"epoch": 11.72,
"learning_rate": 6.55979137460619e-06,
"loss": 0.0082,
"step": 777500
},
{
"epoch": 11.73,
"learning_rate": 6.544717285458026e-06,
"loss": 0.008,
"step": 778000
},
{
"epoch": 11.74,
"learning_rate": 6.529643196309864e-06,
"loss": 0.0079,
"step": 778500
},
{
"epoch": 11.74,
"learning_rate": 6.5145691071617e-06,
"loss": 0.0076,
"step": 779000
},
{
"epoch": 11.75,
"learning_rate": 6.499495018013537e-06,
"loss": 0.0078,
"step": 779500
},
{
"epoch": 11.76,
"learning_rate": 6.4844209288653736e-06,
"loss": 0.0079,
"step": 780000
},
{
"epoch": 11.77,
"learning_rate": 6.469346839717211e-06,
"loss": 0.009,
"step": 780500
},
{
"epoch": 11.77,
"learning_rate": 6.4542727505690476e-06,
"loss": 0.0077,
"step": 781000
},
{
"epoch": 11.78,
"learning_rate": 6.439198661420884e-06,
"loss": 0.0092,
"step": 781500
},
{
"epoch": 11.79,
"learning_rate": 6.424124572272721e-06,
"loss": 0.0077,
"step": 782000
},
{
"epoch": 11.8,
"learning_rate": 6.409050483124558e-06,
"loss": 0.0082,
"step": 782500
},
{
"epoch": 11.8,
"learning_rate": 6.393976393976395e-06,
"loss": 0.0089,
"step": 783000
},
{
"epoch": 11.81,
"learning_rate": 6.3789023048282304e-06,
"loss": 0.0087,
"step": 783500
},
{
"epoch": 11.82,
"learning_rate": 6.363828215680067e-06,
"loss": 0.0087,
"step": 784000
},
{
"epoch": 11.83,
"learning_rate": 6.3487541265319044e-06,
"loss": 0.0083,
"step": 784500
},
{
"epoch": 11.83,
"learning_rate": 6.333680037383741e-06,
"loss": 0.0083,
"step": 785000
},
{
"epoch": 11.84,
"learning_rate": 6.3186059482355776e-06,
"loss": 0.0077,
"step": 785500
},
{
"epoch": 11.85,
"learning_rate": 6.303531859087414e-06,
"loss": 0.0089,
"step": 786000
},
{
"epoch": 11.86,
"learning_rate": 6.2884577699392516e-06,
"loss": 0.0078,
"step": 786500
},
{
"epoch": 11.86,
"learning_rate": 6.273383680791088e-06,
"loss": 0.008,
"step": 787000
},
{
"epoch": 11.87,
"learning_rate": 6.258309591642925e-06,
"loss": 0.0088,
"step": 787500
},
{
"epoch": 11.88,
"learning_rate": 6.243235502494761e-06,
"loss": 0.0076,
"step": 788000
},
{
"epoch": 11.89,
"learning_rate": 6.228161413346599e-06,
"loss": 0.0084,
"step": 788500
},
{
"epoch": 11.89,
"learning_rate": 6.213087324198435e-06,
"loss": 0.0086,
"step": 789000
},
{
"epoch": 11.9,
"learning_rate": 6.198013235050272e-06,
"loss": 0.0094,
"step": 789500
},
{
"epoch": 11.91,
"learning_rate": 6.1829391459021084e-06,
"loss": 0.0085,
"step": 790000
},
{
"epoch": 11.92,
"learning_rate": 6.167865056753946e-06,
"loss": 0.0088,
"step": 790500
},
{
"epoch": 11.92,
"learning_rate": 6.1527909676057824e-06,
"loss": 0.0088,
"step": 791000
},
{
"epoch": 11.93,
"learning_rate": 6.137716878457619e-06,
"loss": 0.0082,
"step": 791500
},
{
"epoch": 11.94,
"learning_rate": 6.122642789309456e-06,
"loss": 0.0078,
"step": 792000
},
{
"epoch": 11.95,
"learning_rate": 6.107568700161293e-06,
"loss": 0.0084,
"step": 792500
},
{
"epoch": 11.95,
"learning_rate": 6.09249461101313e-06,
"loss": 0.0089,
"step": 793000
},
{
"epoch": 11.96,
"learning_rate": 6.077420521864966e-06,
"loss": 0.0079,
"step": 793500
},
{
"epoch": 11.97,
"learning_rate": 6.062346432716803e-06,
"loss": 0.0084,
"step": 794000
},
{
"epoch": 11.98,
"learning_rate": 6.047272343568639e-06,
"loss": 0.0087,
"step": 794500
},
{
"epoch": 11.98,
"learning_rate": 6.032198254420477e-06,
"loss": 0.0084,
"step": 795000
},
{
"epoch": 11.99,
"learning_rate": 6.017124165272313e-06,
"loss": 0.0084,
"step": 795500
},
{
"epoch": 12.0,
"learning_rate": 6.00205007612415e-06,
"loss": 0.009,
"step": 796000
},
{
"epoch": 12.0,
"eval_accuracy": 0.9879435109067147,
"eval_f1": 0.9494733505974576,
"eval_loss": 0.06574396789073944,
"eval_precision": 0.9362427656485499,
"eval_recall": 0.963083233635276,
"eval_runtime": 225.9481,
"eval_samples_per_second": 521.907,
"eval_steps_per_second": 32.623,
"step": 796068
},
{
"epoch": 12.01,
"learning_rate": 5.9869759869759865e-06,
"loss": 0.0065,
"step": 796500
},
{
"epoch": 12.01,
"learning_rate": 5.971901897827824e-06,
"loss": 0.0072,
"step": 797000
},
{
"epoch": 12.02,
"learning_rate": 5.9568278086796605e-06,
"loss": 0.0067,
"step": 797500
},
{
"epoch": 12.03,
"learning_rate": 5.941753719531497e-06,
"loss": 0.0071,
"step": 798000
},
{
"epoch": 12.04,
"learning_rate": 5.926679630383334e-06,
"loss": 0.007,
"step": 798500
},
{
"epoch": 12.04,
"learning_rate": 5.911605541235171e-06,
"loss": 0.0073,
"step": 799000
},
{
"epoch": 12.05,
"learning_rate": 5.896531452087008e-06,
"loss": 0.0076,
"step": 799500
},
{
"epoch": 12.06,
"learning_rate": 5.881457362938844e-06,
"loss": 0.0075,
"step": 800000
},
{
"epoch": 12.07,
"learning_rate": 5.866383273790681e-06,
"loss": 0.0065,
"step": 800500
},
{
"epoch": 12.07,
"learning_rate": 5.851309184642518e-06,
"loss": 0.0072,
"step": 801000
},
{
"epoch": 12.08,
"learning_rate": 5.836235095494355e-06,
"loss": 0.0073,
"step": 801500
},
{
"epoch": 12.09,
"learning_rate": 5.821161006346191e-06,
"loss": 0.0072,
"step": 802000
},
{
"epoch": 12.1,
"learning_rate": 5.806086917198028e-06,
"loss": 0.0068,
"step": 802500
},
{
"epoch": 12.1,
"learning_rate": 5.791012828049865e-06,
"loss": 0.0068,
"step": 803000
},
{
"epoch": 12.11,
"learning_rate": 5.775938738901702e-06,
"loss": 0.0072,
"step": 803500
},
{
"epoch": 12.12,
"learning_rate": 5.7608646497535385e-06,
"loss": 0.0081,
"step": 804000
},
{
"epoch": 12.13,
"learning_rate": 5.745790560605375e-06,
"loss": 0.0075,
"step": 804500
},
{
"epoch": 12.13,
"learning_rate": 5.7307164714572125e-06,
"loss": 0.0069,
"step": 805000
},
{
"epoch": 12.14,
"learning_rate": 5.715642382309049e-06,
"loss": 0.0073,
"step": 805500
},
{
"epoch": 12.15,
"learning_rate": 5.700568293160886e-06,
"loss": 0.0072,
"step": 806000
},
{
"epoch": 12.16,
"learning_rate": 5.685494204012722e-06,
"loss": 0.007,
"step": 806500
},
{
"epoch": 12.16,
"learning_rate": 5.67042011486456e-06,
"loss": 0.0071,
"step": 807000
},
{
"epoch": 12.17,
"learning_rate": 5.655346025716396e-06,
"loss": 0.0071,
"step": 807500
},
{
"epoch": 12.18,
"learning_rate": 5.640271936568233e-06,
"loss": 0.007,
"step": 808000
},
{
"epoch": 12.19,
"learning_rate": 5.625197847420069e-06,
"loss": 0.0066,
"step": 808500
},
{
"epoch": 12.19,
"learning_rate": 5.610123758271907e-06,
"loss": 0.0073,
"step": 809000
},
{
"epoch": 12.2,
"learning_rate": 5.595049669123743e-06,
"loss": 0.0067,
"step": 809500
},
{
"epoch": 12.21,
"learning_rate": 5.57997557997558e-06,
"loss": 0.0073,
"step": 810000
},
{
"epoch": 12.22,
"learning_rate": 5.5649014908274165e-06,
"loss": 0.0071,
"step": 810500
},
{
"epoch": 12.23,
"learning_rate": 5.549827401679254e-06,
"loss": 0.0075,
"step": 811000
},
{
"epoch": 12.23,
"learning_rate": 5.5347533125310905e-06,
"loss": 0.0074,
"step": 811500
},
{
"epoch": 12.24,
"learning_rate": 5.519679223382927e-06,
"loss": 0.0069,
"step": 812000
},
{
"epoch": 12.25,
"learning_rate": 5.504605134234764e-06,
"loss": 0.0064,
"step": 812500
},
{
"epoch": 12.26,
"learning_rate": 5.489531045086601e-06,
"loss": 0.0067,
"step": 813000
},
{
"epoch": 12.26,
"learning_rate": 5.474456955938438e-06,
"loss": 0.0068,
"step": 813500
},
{
"epoch": 12.27,
"learning_rate": 5.459382866790274e-06,
"loss": 0.0066,
"step": 814000
},
{
"epoch": 12.28,
"learning_rate": 5.444308777642111e-06,
"loss": 0.0069,
"step": 814500
},
{
"epoch": 12.29,
"learning_rate": 5.429234688493948e-06,
"loss": 0.0075,
"step": 815000
},
{
"epoch": 12.29,
"learning_rate": 5.414160599345785e-06,
"loss": 0.0082,
"step": 815500
},
{
"epoch": 12.3,
"learning_rate": 5.399086510197621e-06,
"loss": 0.0078,
"step": 816000
},
{
"epoch": 12.31,
"learning_rate": 5.384012421049458e-06,
"loss": 0.0071,
"step": 816500
},
{
"epoch": 12.32,
"learning_rate": 5.368938331901295e-06,
"loss": 0.0076,
"step": 817000
},
{
"epoch": 12.32,
"learning_rate": 5.353864242753132e-06,
"loss": 0.0071,
"step": 817500
},
{
"epoch": 12.33,
"learning_rate": 5.3387901536049685e-06,
"loss": 0.0069,
"step": 818000
},
{
"epoch": 12.34,
"learning_rate": 5.323716064456805e-06,
"loss": 0.0068,
"step": 818500
},
{
"epoch": 12.35,
"learning_rate": 5.3086419753086425e-06,
"loss": 0.0067,
"step": 819000
},
{
"epoch": 12.35,
"learning_rate": 5.293567886160479e-06,
"loss": 0.0067,
"step": 819500
},
{
"epoch": 12.36,
"learning_rate": 5.278493797012316e-06,
"loss": 0.0075,
"step": 820000
},
{
"epoch": 12.37,
"learning_rate": 5.263419707864152e-06,
"loss": 0.0067,
"step": 820500
},
{
"epoch": 12.38,
"learning_rate": 5.24834561871599e-06,
"loss": 0.0072,
"step": 821000
},
{
"epoch": 12.38,
"learning_rate": 5.233271529567826e-06,
"loss": 0.0071,
"step": 821500
},
{
"epoch": 12.39,
"learning_rate": 5.218197440419663e-06,
"loss": 0.0073,
"step": 822000
},
{
"epoch": 12.4,
"learning_rate": 5.203123351271499e-06,
"loss": 0.0068,
"step": 822500
},
{
"epoch": 12.41,
"learning_rate": 5.188049262123336e-06,
"loss": 0.0079,
"step": 823000
},
{
"epoch": 12.41,
"learning_rate": 5.172975172975173e-06,
"loss": 0.0076,
"step": 823500
},
{
"epoch": 12.42,
"learning_rate": 5.15790108382701e-06,
"loss": 0.0066,
"step": 824000
},
{
"epoch": 12.43,
"learning_rate": 5.1428269946788465e-06,
"loss": 0.0078,
"step": 824500
},
{
"epoch": 12.44,
"learning_rate": 5.127752905530683e-06,
"loss": 0.0067,
"step": 825000
},
{
"epoch": 12.44,
"learning_rate": 5.1126788163825205e-06,
"loss": 0.0069,
"step": 825500
},
{
"epoch": 12.45,
"learning_rate": 5.097604727234357e-06,
"loss": 0.0077,
"step": 826000
},
{
"epoch": 12.46,
"learning_rate": 5.082530638086194e-06,
"loss": 0.0066,
"step": 826500
},
{
"epoch": 12.47,
"learning_rate": 5.06745654893803e-06,
"loss": 0.0082,
"step": 827000
},
{
"epoch": 12.47,
"learning_rate": 5.052382459789868e-06,
"loss": 0.0064,
"step": 827500
},
{
"epoch": 12.48,
"learning_rate": 5.037308370641704e-06,
"loss": 0.007,
"step": 828000
},
{
"epoch": 12.49,
"learning_rate": 5.022234281493541e-06,
"loss": 0.0072,
"step": 828500
},
{
"epoch": 12.5,
"learning_rate": 5.007160192345377e-06,
"loss": 0.0076,
"step": 829000
},
{
"epoch": 12.5,
"learning_rate": 4.992086103197215e-06,
"loss": 0.0075,
"step": 829500
},
{
"epoch": 12.51,
"learning_rate": 4.977012014049051e-06,
"loss": 0.0072,
"step": 830000
},
{
"epoch": 12.52,
"learning_rate": 4.961937924900888e-06,
"loss": 0.0069,
"step": 830500
},
{
"epoch": 12.53,
"learning_rate": 4.9468638357527245e-06,
"loss": 0.0068,
"step": 831000
},
{
"epoch": 12.53,
"learning_rate": 4.931789746604562e-06,
"loss": 0.0068,
"step": 831500
},
{
"epoch": 12.54,
"learning_rate": 4.9167156574563985e-06,
"loss": 0.0075,
"step": 832000
},
{
"epoch": 12.55,
"learning_rate": 4.901641568308235e-06,
"loss": 0.0077,
"step": 832500
},
{
"epoch": 12.56,
"learning_rate": 4.886567479160072e-06,
"loss": 0.0072,
"step": 833000
},
{
"epoch": 12.56,
"learning_rate": 4.871493390011909e-06,
"loss": 0.0076,
"step": 833500
},
{
"epoch": 12.57,
"learning_rate": 4.856419300863746e-06,
"loss": 0.0067,
"step": 834000
},
{
"epoch": 12.58,
"learning_rate": 4.841345211715582e-06,
"loss": 0.0075,
"step": 834500
},
{
"epoch": 12.59,
"learning_rate": 4.826271122567419e-06,
"loss": 0.0076,
"step": 835000
},
{
"epoch": 12.59,
"learning_rate": 4.811197033419256e-06,
"loss": 0.0068,
"step": 835500
},
{
"epoch": 12.6,
"learning_rate": 4.796122944271093e-06,
"loss": 0.0071,
"step": 836000
},
{
"epoch": 12.61,
"learning_rate": 4.781048855122929e-06,
"loss": 0.0074,
"step": 836500
},
{
"epoch": 12.62,
"learning_rate": 4.765974765974766e-06,
"loss": 0.0083,
"step": 837000
},
{
"epoch": 12.62,
"learning_rate": 4.750900676826603e-06,
"loss": 0.0069,
"step": 837500
},
{
"epoch": 12.63,
"learning_rate": 4.73582658767844e-06,
"loss": 0.0077,
"step": 838000
},
{
"epoch": 12.64,
"learning_rate": 4.7207524985302765e-06,
"loss": 0.0073,
"step": 838500
},
{
"epoch": 12.65,
"learning_rate": 4.705678409382113e-06,
"loss": 0.0068,
"step": 839000
},
{
"epoch": 12.65,
"learning_rate": 4.6906043202339505e-06,
"loss": 0.0075,
"step": 839500
},
{
"epoch": 12.66,
"learning_rate": 4.675530231085787e-06,
"loss": 0.0068,
"step": 840000
},
{
"epoch": 12.67,
"learning_rate": 4.660456141937624e-06,
"loss": 0.0067,
"step": 840500
},
{
"epoch": 12.68,
"learning_rate": 4.64538205278946e-06,
"loss": 0.0067,
"step": 841000
},
{
"epoch": 12.68,
"learning_rate": 4.630307963641298e-06,
"loss": 0.0073,
"step": 841500
},
{
"epoch": 12.69,
"learning_rate": 4.615233874493134e-06,
"loss": 0.0068,
"step": 842000
},
{
"epoch": 12.7,
"learning_rate": 4.600159785344971e-06,
"loss": 0.0072,
"step": 842500
},
{
"epoch": 12.71,
"learning_rate": 4.585085696196807e-06,
"loss": 0.0067,
"step": 843000
},
{
"epoch": 12.71,
"learning_rate": 4.570011607048645e-06,
"loss": 0.0063,
"step": 843500
},
{
"epoch": 12.72,
"learning_rate": 4.554937517900481e-06,
"loss": 0.0068,
"step": 844000
},
{
"epoch": 12.73,
"learning_rate": 4.539863428752318e-06,
"loss": 0.008,
"step": 844500
},
{
"epoch": 12.74,
"learning_rate": 4.5247893396041546e-06,
"loss": 0.0067,
"step": 845000
},
{
"epoch": 12.75,
"learning_rate": 4.509715250455992e-06,
"loss": 0.0075,
"step": 845500
},
{
"epoch": 12.75,
"learning_rate": 4.4946411613078285e-06,
"loss": 0.0071,
"step": 846000
},
{
"epoch": 12.76,
"learning_rate": 4.479567072159665e-06,
"loss": 0.0072,
"step": 846500
},
{
"epoch": 12.77,
"learning_rate": 4.464492983011502e-06,
"loss": 0.0074,
"step": 847000
},
{
"epoch": 12.78,
"learning_rate": 4.449418893863339e-06,
"loss": 0.0067,
"step": 847500
},
{
"epoch": 12.78,
"learning_rate": 4.434344804715176e-06,
"loss": 0.0069,
"step": 848000
},
{
"epoch": 12.79,
"learning_rate": 4.419270715567012e-06,
"loss": 0.0073,
"step": 848500
},
{
"epoch": 12.8,
"learning_rate": 4.404196626418849e-06,
"loss": 0.0071,
"step": 849000
},
{
"epoch": 12.81,
"learning_rate": 4.389122537270686e-06,
"loss": 0.0073,
"step": 849500
},
{
"epoch": 12.81,
"learning_rate": 4.374048448122523e-06,
"loss": 0.0069,
"step": 850000
},
{
"epoch": 12.82,
"learning_rate": 4.358974358974359e-06,
"loss": 0.0078,
"step": 850500
},
{
"epoch": 12.83,
"learning_rate": 4.343900269826196e-06,
"loss": 0.0066,
"step": 851000
},
{
"epoch": 12.84,
"learning_rate": 4.3288261806780326e-06,
"loss": 0.0074,
"step": 851500
},
{
"epoch": 12.84,
"learning_rate": 4.31375209152987e-06,
"loss": 0.0075,
"step": 852000
},
{
"epoch": 12.85,
"learning_rate": 4.2986780023817066e-06,
"loss": 0.0071,
"step": 852500
},
{
"epoch": 12.86,
"learning_rate": 4.283603913233543e-06,
"loss": 0.0069,
"step": 853000
},
{
"epoch": 12.87,
"learning_rate": 4.26852982408538e-06,
"loss": 0.0065,
"step": 853500
},
{
"epoch": 12.87,
"learning_rate": 4.253455734937217e-06,
"loss": 0.0064,
"step": 854000
},
{
"epoch": 12.88,
"learning_rate": 4.238381645789054e-06,
"loss": 0.0068,
"step": 854500
},
{
"epoch": 12.89,
"learning_rate": 4.22330755664089e-06,
"loss": 0.0069,
"step": 855000
},
{
"epoch": 12.9,
"learning_rate": 4.208233467492727e-06,
"loss": 0.0072,
"step": 855500
},
{
"epoch": 12.9,
"learning_rate": 4.193159378344564e-06,
"loss": 0.0071,
"step": 856000
},
{
"epoch": 12.91,
"learning_rate": 4.178085289196401e-06,
"loss": 0.0069,
"step": 856500
},
{
"epoch": 12.92,
"learning_rate": 4.1630112000482374e-06,
"loss": 0.0076,
"step": 857000
},
{
"epoch": 12.93,
"learning_rate": 4.147937110900074e-06,
"loss": 0.0082,
"step": 857500
},
{
"epoch": 12.93,
"learning_rate": 4.132863021751911e-06,
"loss": 0.0074,
"step": 858000
},
{
"epoch": 12.94,
"learning_rate": 4.117788932603747e-06,
"loss": 0.0079,
"step": 858500
},
{
"epoch": 12.95,
"learning_rate": 4.102714843455584e-06,
"loss": 0.0073,
"step": 859000
},
{
"epoch": 12.96,
"learning_rate": 4.08764075430742e-06,
"loss": 0.0066,
"step": 859500
},
{
"epoch": 12.96,
"learning_rate": 4.072566665159258e-06,
"loss": 0.0074,
"step": 860000
},
{
"epoch": 12.97,
"learning_rate": 4.057492576011094e-06,
"loss": 0.0069,
"step": 860500
},
{
"epoch": 12.98,
"learning_rate": 4.042418486862931e-06,
"loss": 0.0077,
"step": 861000
},
{
"epoch": 12.99,
"learning_rate": 4.0273443977147675e-06,
"loss": 0.0073,
"step": 861500
},
{
"epoch": 12.99,
"learning_rate": 4.012270308566605e-06,
"loss": 0.0076,
"step": 862000
},
{
"epoch": 13.0,
"eval_accuracy": 0.988018272621634,
"eval_f1": 0.9499993983080423,
"eval_loss": 0.07826410979032516,
"eval_precision": 0.9366427791751697,
"eval_recall": 0.9637424616060747,
"eval_runtime": 275.2269,
"eval_samples_per_second": 428.461,
"eval_steps_per_second": 26.782,
"step": 862407
},
{
"epoch": 13.0,
"learning_rate": 3.9971962194184415e-06,
"loss": 0.0065,
"step": 862500
},
{
"epoch": 13.01,
"learning_rate": 3.982122130270278e-06,
"loss": 0.0063,
"step": 863000
},
{
"epoch": 13.02,
"learning_rate": 3.967048041122115e-06,
"loss": 0.0061,
"step": 863500
},
{
"epoch": 13.02,
"learning_rate": 3.951973951973952e-06,
"loss": 0.0066,
"step": 864000
},
{
"epoch": 13.03,
"learning_rate": 3.936899862825789e-06,
"loss": 0.0057,
"step": 864500
},
{
"epoch": 13.04,
"learning_rate": 3.921825773677625e-06,
"loss": 0.0057,
"step": 865000
},
{
"epoch": 13.05,
"learning_rate": 3.906751684529462e-06,
"loss": 0.0069,
"step": 865500
},
{
"epoch": 13.05,
"learning_rate": 3.891677595381299e-06,
"loss": 0.0061,
"step": 866000
},
{
"epoch": 13.06,
"learning_rate": 3.876603506233136e-06,
"loss": 0.0069,
"step": 866500
},
{
"epoch": 13.07,
"learning_rate": 3.861529417084972e-06,
"loss": 0.0063,
"step": 867000
},
{
"epoch": 13.08,
"learning_rate": 3.846455327936809e-06,
"loss": 0.0064,
"step": 867500
},
{
"epoch": 13.08,
"learning_rate": 3.831381238788646e-06,
"loss": 0.0062,
"step": 868000
},
{
"epoch": 13.09,
"learning_rate": 3.816307149640483e-06,
"loss": 0.0064,
"step": 868500
},
{
"epoch": 13.1,
"learning_rate": 3.8012330604923195e-06,
"loss": 0.0056,
"step": 869000
},
{
"epoch": 13.11,
"learning_rate": 3.7861589713441565e-06,
"loss": 0.0063,
"step": 869500
},
{
"epoch": 13.11,
"learning_rate": 3.771084882195993e-06,
"loss": 0.0064,
"step": 870000
},
{
"epoch": 13.12,
"learning_rate": 3.75601079304783e-06,
"loss": 0.0055,
"step": 870500
},
{
"epoch": 13.13,
"learning_rate": 3.740936703899667e-06,
"loss": 0.0061,
"step": 871000
},
{
"epoch": 13.14,
"learning_rate": 3.725862614751504e-06,
"loss": 0.006,
"step": 871500
},
{
"epoch": 13.14,
"learning_rate": 3.7107885256033406e-06,
"loss": 0.0062,
"step": 872000
},
{
"epoch": 13.15,
"learning_rate": 3.6957144364551776e-06,
"loss": 0.0061,
"step": 872500
},
{
"epoch": 13.16,
"learning_rate": 3.680640347307014e-06,
"loss": 0.0065,
"step": 873000
},
{
"epoch": 13.17,
"learning_rate": 3.665566258158851e-06,
"loss": 0.0067,
"step": 873500
},
{
"epoch": 13.17,
"learning_rate": 3.6504921690106878e-06,
"loss": 0.0053,
"step": 874000
},
{
"epoch": 13.18,
"learning_rate": 3.6354180798625248e-06,
"loss": 0.0066,
"step": 874500
},
{
"epoch": 13.19,
"learning_rate": 3.6203439907143613e-06,
"loss": 0.0065,
"step": 875000
},
{
"epoch": 13.2,
"learning_rate": 3.6052699015661983e-06,
"loss": 0.0063,
"step": 875500
},
{
"epoch": 13.2,
"learning_rate": 3.590195812418035e-06,
"loss": 0.0059,
"step": 876000
},
{
"epoch": 13.21,
"learning_rate": 3.5751217232698715e-06,
"loss": 0.0061,
"step": 876500
},
{
"epoch": 13.22,
"learning_rate": 3.560047634121708e-06,
"loss": 0.0064,
"step": 877000
},
{
"epoch": 13.23,
"learning_rate": 3.544973544973545e-06,
"loss": 0.0059,
"step": 877500
},
{
"epoch": 13.24,
"learning_rate": 3.5298994558253816e-06,
"loss": 0.0062,
"step": 878000
},
{
"epoch": 13.24,
"learning_rate": 3.5148253666772186e-06,
"loss": 0.006,
"step": 878500
},
{
"epoch": 13.25,
"learning_rate": 3.499751277529055e-06,
"loss": 0.0062,
"step": 879000
},
{
"epoch": 13.26,
"learning_rate": 3.484677188380892e-06,
"loss": 0.0057,
"step": 879500
},
{
"epoch": 13.27,
"learning_rate": 3.4696030992327288e-06,
"loss": 0.0069,
"step": 880000
},
{
"epoch": 13.27,
"learning_rate": 3.4545290100845658e-06,
"loss": 0.0066,
"step": 880500
},
{
"epoch": 13.28,
"learning_rate": 3.4394549209364023e-06,
"loss": 0.0069,
"step": 881000
},
{
"epoch": 13.29,
"learning_rate": 3.424380831788239e-06,
"loss": 0.0063,
"step": 881500
},
{
"epoch": 13.3,
"learning_rate": 3.409306742640076e-06,
"loss": 0.0062,
"step": 882000
},
{
"epoch": 13.3,
"learning_rate": 3.3942326534919125e-06,
"loss": 0.0065,
"step": 882500
},
{
"epoch": 13.31,
"learning_rate": 3.3791585643437495e-06,
"loss": 0.0061,
"step": 883000
},
{
"epoch": 13.32,
"learning_rate": 3.364084475195586e-06,
"loss": 0.0065,
"step": 883500
},
{
"epoch": 13.33,
"learning_rate": 3.349010386047423e-06,
"loss": 0.0064,
"step": 884000
},
{
"epoch": 13.33,
"learning_rate": 3.3339362968992596e-06,
"loss": 0.006,
"step": 884500
},
{
"epoch": 13.34,
"learning_rate": 3.3188622077510966e-06,
"loss": 0.0058,
"step": 885000
},
{
"epoch": 13.35,
"learning_rate": 3.3037881186029332e-06,
"loss": 0.0061,
"step": 885500
},
{
"epoch": 13.36,
"learning_rate": 3.2887140294547702e-06,
"loss": 0.0057,
"step": 886000
},
{
"epoch": 13.36,
"learning_rate": 3.2736399403066068e-06,
"loss": 0.0063,
"step": 886500
},
{
"epoch": 13.37,
"learning_rate": 3.2585658511584438e-06,
"loss": 0.0065,
"step": 887000
},
{
"epoch": 13.38,
"learning_rate": 3.2434917620102804e-06,
"loss": 0.0059,
"step": 887500
},
{
"epoch": 13.39,
"learning_rate": 3.2284176728621174e-06,
"loss": 0.0054,
"step": 888000
},
{
"epoch": 13.39,
"learning_rate": 3.213343583713954e-06,
"loss": 0.0065,
"step": 888500
},
{
"epoch": 13.4,
"learning_rate": 3.198269494565791e-06,
"loss": 0.0059,
"step": 889000
},
{
"epoch": 13.41,
"learning_rate": 3.1831954054176275e-06,
"loss": 0.0056,
"step": 889500
},
{
"epoch": 13.42,
"learning_rate": 3.1681213162694645e-06,
"loss": 0.0058,
"step": 890000
},
{
"epoch": 13.42,
"learning_rate": 3.153047227121301e-06,
"loss": 0.0061,
"step": 890500
},
{
"epoch": 13.43,
"learning_rate": 3.137973137973138e-06,
"loss": 0.006,
"step": 891000
},
{
"epoch": 13.44,
"learning_rate": 3.1228990488249747e-06,
"loss": 0.0066,
"step": 891500
},
{
"epoch": 13.45,
"learning_rate": 3.1078249596768117e-06,
"loss": 0.0066,
"step": 892000
},
{
"epoch": 13.45,
"learning_rate": 3.0927508705286482e-06,
"loss": 0.0062,
"step": 892500
},
{
"epoch": 13.46,
"learning_rate": 3.0776767813804852e-06,
"loss": 0.0057,
"step": 893000
},
{
"epoch": 13.47,
"learning_rate": 3.062602692232322e-06,
"loss": 0.0068,
"step": 893500
},
{
"epoch": 13.48,
"learning_rate": 3.047528603084159e-06,
"loss": 0.0061,
"step": 894000
},
{
"epoch": 13.48,
"learning_rate": 3.0324545139359954e-06,
"loss": 0.0061,
"step": 894500
},
{
"epoch": 13.49,
"learning_rate": 3.0173804247878324e-06,
"loss": 0.0071,
"step": 895000
},
{
"epoch": 13.5,
"learning_rate": 3.002306335639669e-06,
"loss": 0.0063,
"step": 895500
},
{
"epoch": 13.51,
"learning_rate": 2.987232246491506e-06,
"loss": 0.0063,
"step": 896000
},
{
"epoch": 13.51,
"learning_rate": 2.9721581573433425e-06,
"loss": 0.0063,
"step": 896500
},
{
"epoch": 13.52,
"learning_rate": 2.9570840681951795e-06,
"loss": 0.0065,
"step": 897000
},
{
"epoch": 13.53,
"learning_rate": 2.942009979047016e-06,
"loss": 0.0071,
"step": 897500
},
{
"epoch": 13.54,
"learning_rate": 2.926935889898853e-06,
"loss": 0.0064,
"step": 898000
},
{
"epoch": 13.54,
"learning_rate": 2.9118618007506897e-06,
"loss": 0.0056,
"step": 898500
},
{
"epoch": 13.55,
"learning_rate": 2.8967877116025267e-06,
"loss": 0.0072,
"step": 899000
},
{
"epoch": 13.56,
"learning_rate": 2.8817136224543632e-06,
"loss": 0.0067,
"step": 899500
},
{
"epoch": 13.57,
"learning_rate": 2.8666395333062002e-06,
"loss": 0.0059,
"step": 900000
},
{
"epoch": 13.57,
"learning_rate": 2.851565444158037e-06,
"loss": 0.0062,
"step": 900500
},
{
"epoch": 13.58,
"learning_rate": 2.836491355009874e-06,
"loss": 0.006,
"step": 901000
},
{
"epoch": 13.59,
"learning_rate": 2.8214172658617104e-06,
"loss": 0.0063,
"step": 901500
},
{
"epoch": 13.6,
"learning_rate": 2.8063431767135474e-06,
"loss": 0.006,
"step": 902000
},
{
"epoch": 13.6,
"learning_rate": 2.791269087565384e-06,
"loss": 0.0063,
"step": 902500
},
{
"epoch": 13.61,
"learning_rate": 2.776194998417221e-06,
"loss": 0.0066,
"step": 903000
},
{
"epoch": 13.62,
"learning_rate": 2.7611209092690575e-06,
"loss": 0.007,
"step": 903500
},
{
"epoch": 13.63,
"learning_rate": 2.7460468201208945e-06,
"loss": 0.0063,
"step": 904000
},
{
"epoch": 13.63,
"learning_rate": 2.730972730972731e-06,
"loss": 0.0062,
"step": 904500
},
{
"epoch": 13.64,
"learning_rate": 2.715898641824568e-06,
"loss": 0.0061,
"step": 905000
},
{
"epoch": 13.65,
"learning_rate": 2.7008245526764047e-06,
"loss": 0.0061,
"step": 905500
},
{
"epoch": 13.66,
"learning_rate": 2.6857504635282417e-06,
"loss": 0.0067,
"step": 906000
},
{
"epoch": 13.66,
"learning_rate": 2.6706763743800783e-06,
"loss": 0.0063,
"step": 906500
},
{
"epoch": 13.67,
"learning_rate": 2.6556022852319153e-06,
"loss": 0.0061,
"step": 907000
},
{
"epoch": 13.68,
"learning_rate": 2.640528196083752e-06,
"loss": 0.0064,
"step": 907500
},
{
"epoch": 13.69,
"learning_rate": 2.625454106935589e-06,
"loss": 0.0065,
"step": 908000
},
{
"epoch": 13.69,
"learning_rate": 2.6103800177874254e-06,
"loss": 0.0066,
"step": 908500
},
{
"epoch": 13.7,
"learning_rate": 2.5953059286392624e-06,
"loss": 0.0058,
"step": 909000
},
{
"epoch": 13.71,
"learning_rate": 2.580231839491099e-06,
"loss": 0.0063,
"step": 909500
},
{
"epoch": 13.72,
"learning_rate": 2.5651577503429355e-06,
"loss": 0.0065,
"step": 910000
},
{
"epoch": 13.72,
"learning_rate": 2.5500836611947725e-06,
"loss": 0.0063,
"step": 910500
},
{
"epoch": 13.73,
"learning_rate": 2.535009572046609e-06,
"loss": 0.0064,
"step": 911000
},
{
"epoch": 13.74,
"learning_rate": 2.519935482898446e-06,
"loss": 0.0067,
"step": 911500
},
{
"epoch": 13.75,
"learning_rate": 2.5048613937502827e-06,
"loss": 0.0065,
"step": 912000
},
{
"epoch": 13.76,
"learning_rate": 2.4897873046021197e-06,
"loss": 0.0062,
"step": 912500
},
{
"epoch": 13.76,
"learning_rate": 2.4747132154539563e-06,
"loss": 0.0066,
"step": 913000
},
{
"epoch": 13.77,
"learning_rate": 2.4596391263057933e-06,
"loss": 0.0058,
"step": 913500
},
{
"epoch": 13.78,
"learning_rate": 2.4445650371576294e-06,
"loss": 0.0057,
"step": 914000
},
{
"epoch": 13.79,
"learning_rate": 2.4294909480094664e-06,
"loss": 0.007,
"step": 914500
},
{
"epoch": 13.79,
"learning_rate": 2.414416858861303e-06,
"loss": 0.0058,
"step": 915000
},
{
"epoch": 13.8,
"learning_rate": 2.39934276971314e-06,
"loss": 0.0054,
"step": 915500
},
{
"epoch": 13.81,
"learning_rate": 2.3842686805649766e-06,
"loss": 0.0066,
"step": 916000
},
{
"epoch": 13.82,
"learning_rate": 2.3691945914168136e-06,
"loss": 0.0057,
"step": 916500
},
{
"epoch": 13.82,
"learning_rate": 2.35412050226865e-06,
"loss": 0.0057,
"step": 917000
},
{
"epoch": 13.83,
"learning_rate": 2.339046413120487e-06,
"loss": 0.0062,
"step": 917500
},
{
"epoch": 13.84,
"learning_rate": 2.3239723239723237e-06,
"loss": 0.0061,
"step": 918000
},
{
"epoch": 13.85,
"learning_rate": 2.3088982348241607e-06,
"loss": 0.0063,
"step": 918500
},
{
"epoch": 13.85,
"learning_rate": 2.2938241456759973e-06,
"loss": 0.0061,
"step": 919000
},
{
"epoch": 13.86,
"learning_rate": 2.2787500565278343e-06,
"loss": 0.0062,
"step": 919500
},
{
"epoch": 13.87,
"learning_rate": 2.263675967379671e-06,
"loss": 0.0061,
"step": 920000
},
{
"epoch": 13.88,
"learning_rate": 2.248601878231508e-06,
"loss": 0.0059,
"step": 920500
},
{
"epoch": 13.88,
"learning_rate": 2.2335277890833444e-06,
"loss": 0.0061,
"step": 921000
},
{
"epoch": 13.89,
"learning_rate": 2.2184536999351814e-06,
"loss": 0.006,
"step": 921500
},
{
"epoch": 13.9,
"learning_rate": 2.203379610787018e-06,
"loss": 0.0054,
"step": 922000
},
{
"epoch": 13.91,
"learning_rate": 2.188305521638855e-06,
"loss": 0.0063,
"step": 922500
},
{
"epoch": 13.91,
"learning_rate": 2.1732314324906916e-06,
"loss": 0.0057,
"step": 923000
},
{
"epoch": 13.92,
"learning_rate": 2.1581573433425286e-06,
"loss": 0.0061,
"step": 923500
},
{
"epoch": 13.93,
"learning_rate": 2.143083254194365e-06,
"loss": 0.0061,
"step": 924000
},
{
"epoch": 13.94,
"learning_rate": 2.128009165046202e-06,
"loss": 0.0059,
"step": 924500
},
{
"epoch": 13.94,
"learning_rate": 2.1129350758980387e-06,
"loss": 0.0066,
"step": 925000
},
{
"epoch": 13.95,
"learning_rate": 2.0978609867498757e-06,
"loss": 0.0057,
"step": 925500
},
{
"epoch": 13.96,
"learning_rate": 2.0827868976017123e-06,
"loss": 0.0063,
"step": 926000
},
{
"epoch": 13.97,
"learning_rate": 2.0677128084535493e-06,
"loss": 0.0058,
"step": 926500
},
{
"epoch": 13.97,
"learning_rate": 2.052638719305386e-06,
"loss": 0.0061,
"step": 927000
},
{
"epoch": 13.98,
"learning_rate": 2.037564630157223e-06,
"loss": 0.0063,
"step": 927500
},
{
"epoch": 13.99,
"learning_rate": 2.0224905410090594e-06,
"loss": 0.0064,
"step": 928000
},
{
"epoch": 14.0,
"learning_rate": 2.0074164518608964e-06,
"loss": 0.0054,
"step": 928500
},
{
"epoch": 14.0,
"eval_accuracy": 0.9884409088234384,
"eval_f1": 0.9516207694515415,
"eval_loss": 0.08319615572690964,
"eval_precision": 0.938869505641383,
"eval_recall": 0.9647231649453492,
"eval_runtime": 244.0878,
"eval_samples_per_second": 483.121,
"eval_steps_per_second": 30.198,
"step": 928746
}
],
"max_steps": 995085,
"num_train_epochs": 15,
"total_flos": 3.882929181746528e+18,
"trial_name": null,
"trial_params": null
}