|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.997953488372093, |
|
"eval_steps": 500, |
|
"global_step": 2013, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014883720930232559, |
|
"grad_norm": 8.902280630176788, |
|
"learning_rate": 4.950495049504951e-07, |
|
"loss": 0.8797, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.029767441860465118, |
|
"grad_norm": 3.3029491558899418, |
|
"learning_rate": 9.900990099009902e-07, |
|
"loss": 0.774, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.044651162790697675, |
|
"grad_norm": 1.9413977734691883, |
|
"learning_rate": 1.4851485148514852e-06, |
|
"loss": 0.6921, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.059534883720930236, |
|
"grad_norm": 1.5937915147958766, |
|
"learning_rate": 1.9801980198019803e-06, |
|
"loss": 0.6479, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07441860465116279, |
|
"grad_norm": 1.6800049101706125, |
|
"learning_rate": 2.4752475247524753e-06, |
|
"loss": 0.6181, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08930232558139535, |
|
"grad_norm": 2.31629043986904, |
|
"learning_rate": 2.9702970297029703e-06, |
|
"loss": 0.598, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10418604651162791, |
|
"grad_norm": 1.813171169001016, |
|
"learning_rate": 3.4653465346534653e-06, |
|
"loss": 0.5869, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11906976744186047, |
|
"grad_norm": 2.4807755593084577, |
|
"learning_rate": 3.960396039603961e-06, |
|
"loss": 0.5731, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13395348837209303, |
|
"grad_norm": 2.627244335259517, |
|
"learning_rate": 4.455445544554456e-06, |
|
"loss": 0.5715, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14883720930232558, |
|
"grad_norm": 2.1854899385391042, |
|
"learning_rate": 4.950495049504951e-06, |
|
"loss": 0.5589, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16372093023255813, |
|
"grad_norm": 2.1643320968352504, |
|
"learning_rate": 4.999753989526703e-06, |
|
"loss": 0.5591, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1786046511627907, |
|
"grad_norm": 1.6906833870682114, |
|
"learning_rate": 4.998903652018798e-06, |
|
"loss": 0.5473, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19348837209302325, |
|
"grad_norm": 1.6954868322135426, |
|
"learning_rate": 4.997446179820209e-06, |
|
"loss": 0.5418, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.20837209302325582, |
|
"grad_norm": 3.1018130514636666, |
|
"learning_rate": 4.995381966403521e-06, |
|
"loss": 0.5392, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22325581395348837, |
|
"grad_norm": 2.249001753991247, |
|
"learning_rate": 4.9927115690427536e-06, |
|
"loss": 0.538, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23813953488372094, |
|
"grad_norm": 2.5823816443281173, |
|
"learning_rate": 4.989435708662909e-06, |
|
"loss": 0.5337, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25302325581395346, |
|
"grad_norm": 2.271030426623902, |
|
"learning_rate": 4.985555269645351e-06, |
|
"loss": 0.5236, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26790697674418606, |
|
"grad_norm": 2.318489677318734, |
|
"learning_rate": 4.981071299589047e-06, |
|
"loss": 0.5245, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2827906976744186, |
|
"grad_norm": 1.9206844645280905, |
|
"learning_rate": 4.975985009027748e-06, |
|
"loss": 0.5218, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29767441860465116, |
|
"grad_norm": 2.9110235751406983, |
|
"learning_rate": 4.970297771103183e-06, |
|
"loss": 0.5225, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3125581395348837, |
|
"grad_norm": 2.4648347140615376, |
|
"learning_rate": 4.964011121194349e-06, |
|
"loss": 0.5125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.32744186046511625, |
|
"grad_norm": 1.8762938609852826, |
|
"learning_rate": 4.957126756503014e-06, |
|
"loss": 0.5119, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.34232558139534885, |
|
"grad_norm": 2.634519062843505, |
|
"learning_rate": 4.949646535595514e-06, |
|
"loss": 0.5089, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3572093023255814, |
|
"grad_norm": 2.7133170705876712, |
|
"learning_rate": 4.941572477901008e-06, |
|
"loss": 0.5028, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 1.772078056634877, |
|
"learning_rate": 4.932906763166286e-06, |
|
"loss": 0.5036, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3869767441860465, |
|
"grad_norm": 2.3772772619843856, |
|
"learning_rate": 4.9236517308673135e-06, |
|
"loss": 0.5051, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4018604651162791, |
|
"grad_norm": 1.7586683386687694, |
|
"learning_rate": 4.9138098795776335e-06, |
|
"loss": 0.4996, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.41674418604651164, |
|
"grad_norm": 2.441721431571048, |
|
"learning_rate": 4.903383866293839e-06, |
|
"loss": 0.5003, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4316279069767442, |
|
"grad_norm": 1.6335890264070398, |
|
"learning_rate": 4.89237650571826e-06, |
|
"loss": 0.5009, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.44651162790697674, |
|
"grad_norm": 1.8052385644508027, |
|
"learning_rate": 4.880790769499083e-06, |
|
"loss": 0.4983, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4613953488372093, |
|
"grad_norm": 1.800037297627897, |
|
"learning_rate": 4.868629785428096e-06, |
|
"loss": 0.497, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4762790697674419, |
|
"grad_norm": 1.605966469734369, |
|
"learning_rate": 4.855896836596282e-06, |
|
"loss": 0.4929, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.49116279069767443, |
|
"grad_norm": 1.994957648259566, |
|
"learning_rate": 4.842595360507486e-06, |
|
"loss": 0.4966, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5060465116279069, |
|
"grad_norm": 1.7903983897538154, |
|
"learning_rate": 4.828728948150395e-06, |
|
"loss": 0.4948, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5209302325581395, |
|
"grad_norm": 1.5566561390721225, |
|
"learning_rate": 4.8143013430290805e-06, |
|
"loss": 0.4918, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5358139534883721, |
|
"grad_norm": 1.449953579251504, |
|
"learning_rate": 4.799316440152367e-06, |
|
"loss": 0.4899, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5506976744186046, |
|
"grad_norm": 1.945851570880214, |
|
"learning_rate": 4.783778284982303e-06, |
|
"loss": 0.4859, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5655813953488372, |
|
"grad_norm": 1.6223980803827658, |
|
"learning_rate": 4.767691072342006e-06, |
|
"loss": 0.4884, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5804651162790697, |
|
"grad_norm": 1.8375917251155103, |
|
"learning_rate": 4.7510591452831975e-06, |
|
"loss": 0.4809, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5953488372093023, |
|
"grad_norm": 1.742322917798526, |
|
"learning_rate": 4.733886993913704e-06, |
|
"loss": 0.4857, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6102325581395349, |
|
"grad_norm": 1.6356424508667176, |
|
"learning_rate": 4.7161792541852675e-06, |
|
"loss": 0.4872, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6251162790697674, |
|
"grad_norm": 1.5526421294000088, |
|
"learning_rate": 4.69794070664199e-06, |
|
"loss": 0.4829, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.4232942412821412, |
|
"learning_rate": 4.6791762751297236e-06, |
|
"loss": 0.481, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6548837209302325, |
|
"grad_norm": 1.483668935893611, |
|
"learning_rate": 4.65989102546679e-06, |
|
"loss": 0.4821, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6697674418604651, |
|
"grad_norm": 2.1539928952738405, |
|
"learning_rate": 4.640090164076361e-06, |
|
"loss": 0.4749, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6846511627906977, |
|
"grad_norm": 2.1025050973956767, |
|
"learning_rate": 4.61977903658089e-06, |
|
"loss": 0.4804, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6995348837209302, |
|
"grad_norm": 1.7519293478931726, |
|
"learning_rate": 4.5989631263589546e-06, |
|
"loss": 0.4743, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7144186046511628, |
|
"grad_norm": 2.0423876723189243, |
|
"learning_rate": 4.5776480530649155e-06, |
|
"loss": 0.4726, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7293023255813953, |
|
"grad_norm": 1.771995353632526, |
|
"learning_rate": 4.555839571111782e-06, |
|
"loss": 0.4728, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 1.7344799986510258, |
|
"learning_rate": 4.533543568117697e-06, |
|
"loss": 0.4725, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7590697674418605, |
|
"grad_norm": 1.3702958835212744, |
|
"learning_rate": 4.5107660633164645e-06, |
|
"loss": 0.475, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.773953488372093, |
|
"grad_norm": 1.5331038199927272, |
|
"learning_rate": 4.487513205932537e-06, |
|
"loss": 0.4758, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7888372093023256, |
|
"grad_norm": 1.4415751132180146, |
|
"learning_rate": 4.46379127352092e-06, |
|
"loss": 0.4651, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8037209302325582, |
|
"grad_norm": 1.5387474607067084, |
|
"learning_rate": 4.439606670272421e-06, |
|
"loss": 0.4717, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8186046511627907, |
|
"grad_norm": 1.3930637138971305, |
|
"learning_rate": 4.414965925284719e-06, |
|
"loss": 0.4683, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8334883720930233, |
|
"grad_norm": 1.4357407382473695, |
|
"learning_rate": 4.389875690799706e-06, |
|
"loss": 0.4705, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8483720930232558, |
|
"grad_norm": 1.867160837622047, |
|
"learning_rate": 4.364342740407589e-06, |
|
"loss": 0.4684, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8632558139534884, |
|
"grad_norm": 2.079586326625279, |
|
"learning_rate": 4.338373967218229e-06, |
|
"loss": 0.4629, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.878139534883721, |
|
"grad_norm": 1.6049505438520633, |
|
"learning_rate": 4.3119763820002105e-06, |
|
"loss": 0.4643, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8930232558139535, |
|
"grad_norm": 1.2874061526414027, |
|
"learning_rate": 4.285157111288156e-06, |
|
"loss": 0.4642, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9079069767441861, |
|
"grad_norm": 1.545108445973155, |
|
"learning_rate": 4.257923395458778e-06, |
|
"loss": 0.4606, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9227906976744186, |
|
"grad_norm": 1.6451186302266136, |
|
"learning_rate": 4.230282586776198e-06, |
|
"loss": 0.4584, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9376744186046512, |
|
"grad_norm": 2.1325124535031263, |
|
"learning_rate": 4.202242147407065e-06, |
|
"loss": 0.4621, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9525581395348838, |
|
"grad_norm": 2.0011178228576862, |
|
"learning_rate": 4.173809647406001e-06, |
|
"loss": 0.4601, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9674418604651163, |
|
"grad_norm": 1.5742244911614134, |
|
"learning_rate": 4.1449927626719164e-06, |
|
"loss": 0.456, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9823255813953489, |
|
"grad_norm": 1.7386928439484917, |
|
"learning_rate": 4.115799272875756e-06, |
|
"loss": 0.4548, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9972093023255814, |
|
"grad_norm": 1.5311577866040798, |
|
"learning_rate": 4.086237059360228e-06, |
|
"loss": 0.4624, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9986976744186047, |
|
"eval_loss": 0.05731714889407158, |
|
"eval_runtime": 455.161, |
|
"eval_samples_per_second": 39.773, |
|
"eval_steps_per_second": 0.622, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.0130232558139536, |
|
"grad_norm": 1.9657672125173562, |
|
"learning_rate": 4.056314103012081e-06, |
|
"loss": 0.3827, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.027906976744186, |
|
"grad_norm": 1.6977560049208136, |
|
"learning_rate": 4.026038482107515e-06, |
|
"loss": 0.3642, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0427906976744186, |
|
"grad_norm": 1.7125394386865773, |
|
"learning_rate": 3.995418370131294e-06, |
|
"loss": 0.3649, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0576744186046512, |
|
"grad_norm": 1.8561895728990827, |
|
"learning_rate": 3.964462033570154e-06, |
|
"loss": 0.3662, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.0725581395348838, |
|
"grad_norm": 1.5618111186332968, |
|
"learning_rate": 3.9331778296811126e-06, |
|
"loss": 0.3658, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0874418604651164, |
|
"grad_norm": 1.6589684668519427, |
|
"learning_rate": 3.9015742042352575e-06, |
|
"loss": 0.3633, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1023255813953488, |
|
"grad_norm": 1.6737692269771278, |
|
"learning_rate": 3.8696596892376615e-06, |
|
"loss": 0.3683, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.1172093023255814, |
|
"grad_norm": 1.9826343984663213, |
|
"learning_rate": 3.8374429006239915e-06, |
|
"loss": 0.366, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.132093023255814, |
|
"grad_norm": 1.7884778638246734, |
|
"learning_rate": 3.8049325359344804e-06, |
|
"loss": 0.3692, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1469767441860466, |
|
"grad_norm": 2.611566598798586, |
|
"learning_rate": 3.7721373719658526e-06, |
|
"loss": 0.3712, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.1618604651162792, |
|
"grad_norm": 1.7457709366563403, |
|
"learning_rate": 3.7390662624018648e-06, |
|
"loss": 0.3693, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1767441860465115, |
|
"grad_norm": 1.9569418242995398, |
|
"learning_rate": 3.7057281354230794e-06, |
|
"loss": 0.3653, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1916279069767441, |
|
"grad_norm": 2.173634628655407, |
|
"learning_rate": 3.6721319912965366e-06, |
|
"loss": 0.3649, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2065116279069767, |
|
"grad_norm": 2.2801578585083324, |
|
"learning_rate": 3.6382868999459524e-06, |
|
"loss": 0.3685, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.2213953488372093, |
|
"grad_norm": 2.3692062383608197, |
|
"learning_rate": 3.6042019985031244e-06, |
|
"loss": 0.3712, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.236279069767442, |
|
"grad_norm": 2.3703941397431945, |
|
"learning_rate": 3.569886488841187e-06, |
|
"loss": 0.3659, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2511627906976743, |
|
"grad_norm": 1.8705318246188423, |
|
"learning_rate": 3.535349635090386e-06, |
|
"loss": 0.3682, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.266046511627907, |
|
"grad_norm": 1.404469365060906, |
|
"learning_rate": 3.5006007611370513e-06, |
|
"loss": 0.3662, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2809302325581395, |
|
"grad_norm": 1.797601704761619, |
|
"learning_rate": 3.465649248106435e-06, |
|
"loss": 0.3661, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2958139534883721, |
|
"grad_norm": 1.8585619220995928, |
|
"learning_rate": 3.4305045318300974e-06, |
|
"loss": 0.3647, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.3106976744186047, |
|
"grad_norm": 1.4765933716397583, |
|
"learning_rate": 3.3951761002985184e-06, |
|
"loss": 0.3673, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.3255813953488373, |
|
"grad_norm": 1.4125161732202964, |
|
"learning_rate": 3.3596734910996397e-06, |
|
"loss": 0.3642, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.3404651162790697, |
|
"grad_norm": 1.4190852276081691, |
|
"learning_rate": 3.3240062888440046e-06, |
|
"loss": 0.3688, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3553488372093023, |
|
"grad_norm": 1.7022894617337727, |
|
"learning_rate": 3.2881841225772097e-06, |
|
"loss": 0.3697, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.370232558139535, |
|
"grad_norm": 1.4899520481456272, |
|
"learning_rate": 3.2522166631803616e-06, |
|
"loss": 0.371, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3851162790697673, |
|
"grad_norm": 1.5320087948001035, |
|
"learning_rate": 3.2161136207592323e-06, |
|
"loss": 0.3661, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.5107565046345355, |
|
"learning_rate": 3.1798847420228358e-06, |
|
"loss": 0.3716, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.4148837209302325, |
|
"grad_norm": 1.4964498805556543, |
|
"learning_rate": 3.14353980765211e-06, |
|
"loss": 0.3681, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.4297674418604651, |
|
"grad_norm": 1.4602567799649149, |
|
"learning_rate": 3.1070886296594427e-06, |
|
"loss": 0.367, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4446511627906977, |
|
"grad_norm": 1.5151588768049005, |
|
"learning_rate": 3.0705410487397214e-06, |
|
"loss": 0.3634, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.4595348837209303, |
|
"grad_norm": 1.4429762741417749, |
|
"learning_rate": 3.0339069316136573e-06, |
|
"loss": 0.3692, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.474418604651163, |
|
"grad_norm": 1.795013045267996, |
|
"learning_rate": 2.9971961683640683e-06, |
|
"loss": 0.3677, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.4893023255813953, |
|
"grad_norm": 1.772230394205441, |
|
"learning_rate": 2.9604186697658642e-06, |
|
"loss": 0.3653, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.504186046511628, |
|
"grad_norm": 1.517966913164345, |
|
"learning_rate": 2.923584364610444e-06, |
|
"loss": 0.3674, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.5190697674418605, |
|
"grad_norm": 1.3320746245185962, |
|
"learning_rate": 2.8867031970252262e-06, |
|
"loss": 0.3654, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.5339534883720929, |
|
"grad_norm": 1.4422364510340309, |
|
"learning_rate": 2.84978512378904e-06, |
|
"loss": 0.3649, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5488372093023255, |
|
"grad_norm": 1.422279942394409, |
|
"learning_rate": 2.8128401116441058e-06, |
|
"loss": 0.3637, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.563720930232558, |
|
"grad_norm": 1.325658675407642, |
|
"learning_rate": 2.7758781346053165e-06, |
|
"loss": 0.365, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5786046511627907, |
|
"grad_norm": 1.3384490232214399, |
|
"learning_rate": 2.738909171267566e-06, |
|
"loss": 0.3673, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5934883720930233, |
|
"grad_norm": 1.4369227508461269, |
|
"learning_rate": 2.7019432021118314e-06, |
|
"loss": 0.3667, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.608372093023256, |
|
"grad_norm": 1.550458179933586, |
|
"learning_rate": 2.664990206810755e-06, |
|
"loss": 0.3609, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.6232558139534885, |
|
"grad_norm": 1.4665966921755653, |
|
"learning_rate": 2.628060161534437e-06, |
|
"loss": 0.3677, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.6381395348837209, |
|
"grad_norm": 1.4478720169649395, |
|
"learning_rate": 2.5911630362571787e-06, |
|
"loss": 0.3663, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6530232558139535, |
|
"grad_norm": 1.427086254069164, |
|
"learning_rate": 2.5543087920658945e-06, |
|
"loss": 0.3639, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.667906976744186, |
|
"grad_norm": 1.354522173953887, |
|
"learning_rate": 2.517507378470929e-06, |
|
"loss": 0.3611, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.6827906976744185, |
|
"grad_norm": 1.3693977150384091, |
|
"learning_rate": 2.480768730719992e-06, |
|
"loss": 0.3652, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.697674418604651, |
|
"grad_norm": 1.2855988884013045, |
|
"learning_rate": 2.4441027671159503e-06, |
|
"loss": 0.3639, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.7125581395348837, |
|
"grad_norm": 1.403080844114371, |
|
"learning_rate": 2.4075193863391906e-06, |
|
"loss": 0.3647, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.7274418604651163, |
|
"grad_norm": 1.4103414505961822, |
|
"learning_rate": 2.3710284647752805e-06, |
|
"loss": 0.3656, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.7423255813953489, |
|
"grad_norm": 1.328279125442394, |
|
"learning_rate": 2.3346398538486488e-06, |
|
"loss": 0.3601, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7572093023255815, |
|
"grad_norm": 1.4237569279155826, |
|
"learning_rate": 2.2983633773630056e-06, |
|
"loss": 0.3648, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.772093023255814, |
|
"grad_norm": 1.3716090849409592, |
|
"learning_rate": 2.2622088288492166e-06, |
|
"loss": 0.3608, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7869767441860465, |
|
"grad_norm": 1.3654603526464721, |
|
"learning_rate": 2.2261859689213523e-06, |
|
"loss": 0.3597, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.801860465116279, |
|
"grad_norm": 1.2691881731891166, |
|
"learning_rate": 2.1903045226416216e-06, |
|
"loss": 0.362, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.8167441860465117, |
|
"grad_norm": 1.3157985277229747, |
|
"learning_rate": 2.1545741768949085e-06, |
|
"loss": 0.3611, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.831627906976744, |
|
"grad_norm": 1.3583216702788425, |
|
"learning_rate": 2.1190045777736057e-06, |
|
"loss": 0.3613, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.8465116279069766, |
|
"grad_norm": 1.2745346937557744, |
|
"learning_rate": 2.0836053279734723e-06, |
|
"loss": 0.3569, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8613953488372093, |
|
"grad_norm": 1.3036190956031755, |
|
"learning_rate": 2.0483859842011976e-06, |
|
"loss": 0.3597, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8762790697674419, |
|
"grad_norm": 1.4119443086776595, |
|
"learning_rate": 2.0133560545943902e-06, |
|
"loss": 0.3636, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.8911627906976745, |
|
"grad_norm": 1.2897404474298366, |
|
"learning_rate": 1.9785249961546668e-06, |
|
"loss": 0.3575, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.906046511627907, |
|
"grad_norm": 1.3639990026448434, |
|
"learning_rate": 1.94390221219456e-06, |
|
"loss": 0.3592, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.9209302325581397, |
|
"grad_norm": 1.2546167591452697, |
|
"learning_rate": 1.909497049798906e-06, |
|
"loss": 0.3584, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.935813953488372, |
|
"grad_norm": 1.3438380702724106, |
|
"learning_rate": 1.8753187973014302e-06, |
|
"loss": 0.3623, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9506976744186046, |
|
"grad_norm": 1.3452056759551596, |
|
"learning_rate": 1.8413766817771716e-06, |
|
"loss": 0.3597, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9655813953488372, |
|
"grad_norm": 1.3834284182053571, |
|
"learning_rate": 1.8076798665514672e-06, |
|
"loss": 0.3586, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9804651162790696, |
|
"grad_norm": 1.275093148045728, |
|
"learning_rate": 1.7742374487261275e-06, |
|
"loss": 0.3556, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.9953488372093022, |
|
"grad_norm": 1.3336419955281094, |
|
"learning_rate": 1.7410584567235063e-06, |
|
"loss": 0.3593, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.9983255813953489, |
|
"eval_loss": 0.05562544986605644, |
|
"eval_runtime": 454.3217, |
|
"eval_samples_per_second": 39.846, |
|
"eval_steps_per_second": 0.623, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.0111627906976746, |
|
"grad_norm": 1.8801844172458124, |
|
"learning_rate": 1.7081518478491024e-06, |
|
"loss": 0.2974, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.026046511627907, |
|
"grad_norm": 1.5466963455442697, |
|
"learning_rate": 1.6755265058733625e-06, |
|
"loss": 0.2705, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.0409302325581393, |
|
"grad_norm": 1.6491531999787248, |
|
"learning_rate": 1.6431912386333337e-06, |
|
"loss": 0.2715, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.055813953488372, |
|
"grad_norm": 1.3549718677985036, |
|
"learning_rate": 1.61115477565483e-06, |
|
"loss": 0.2711, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0706976744186045, |
|
"grad_norm": 1.4210689732513506, |
|
"learning_rate": 1.5794257657957149e-06, |
|
"loss": 0.2678, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.085581395348837, |
|
"grad_norm": 1.5199527949561047, |
|
"learning_rate": 1.5480127749109867e-06, |
|
"loss": 0.2715, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1004651162790697, |
|
"grad_norm": 1.3921480720538892, |
|
"learning_rate": 1.516924283540257e-06, |
|
"loss": 0.268, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.1153488372093023, |
|
"grad_norm": 1.5217951800344998, |
|
"learning_rate": 1.486168684618268e-06, |
|
"loss": 0.2666, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.130232558139535, |
|
"grad_norm": 1.4410321423758377, |
|
"learning_rate": 1.4557542812090574e-06, |
|
"loss": 0.2698, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.1451162790697675, |
|
"grad_norm": 1.4576711215393965, |
|
"learning_rate": 1.4256892842643893e-06, |
|
"loss": 0.2675, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 1.40927027905477, |
|
"learning_rate": 1.3959818104070452e-06, |
|
"loss": 0.2687, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.1748837209302327, |
|
"grad_norm": 1.4616727679236827, |
|
"learning_rate": 1.3666398797395948e-06, |
|
"loss": 0.2707, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.1897674418604653, |
|
"grad_norm": 1.3795657326148565, |
|
"learning_rate": 1.3376714136792034e-06, |
|
"loss": 0.2665, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.2046511627906975, |
|
"grad_norm": 1.5330397219497653, |
|
"learning_rate": 1.3090842328191053e-06, |
|
"loss": 0.2699, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.21953488372093, |
|
"grad_norm": 1.4083642956808657, |
|
"learning_rate": 1.280886054817277e-06, |
|
"loss": 0.2709, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.2344186046511627, |
|
"grad_norm": 1.4908783738210238, |
|
"learning_rate": 1.2530844923129096e-06, |
|
"loss": 0.2712, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2493023255813953, |
|
"grad_norm": 1.3954198116255683, |
|
"learning_rate": 1.225687050871231e-06, |
|
"loss": 0.2705, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.264186046511628, |
|
"grad_norm": 1.38515024685356, |
|
"learning_rate": 1.1987011269572357e-06, |
|
"loss": 0.2701, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.2790697674418605, |
|
"grad_norm": 1.3876830410349839, |
|
"learning_rate": 1.1721340059388617e-06, |
|
"loss": 0.2672, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.293953488372093, |
|
"grad_norm": 1.5349649821163696, |
|
"learning_rate": 1.1459928601201756e-06, |
|
"loss": 0.2696, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.3088372093023257, |
|
"grad_norm": 1.6222404726063406, |
|
"learning_rate": 1.1202847468050597e-06, |
|
"loss": 0.2689, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.3237209302325583, |
|
"grad_norm": 1.4018802032895803, |
|
"learning_rate": 1.0950166063919694e-06, |
|
"loss": 0.2701, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.3386046511627905, |
|
"grad_norm": 1.3526044361279066, |
|
"learning_rate": 1.0701952605002275e-06, |
|
"loss": 0.2676, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.353488372093023, |
|
"grad_norm": 1.4442038534402912, |
|
"learning_rate": 1.045827410128407e-06, |
|
"loss": 0.2661, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.3683720930232557, |
|
"grad_norm": 1.4163720971352327, |
|
"learning_rate": 1.0219196338452623e-06, |
|
"loss": 0.2689, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.3832558139534883, |
|
"grad_norm": 1.356177063119025, |
|
"learning_rate": 9.984783860137213e-07, |
|
"loss": 0.2676, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.398139534883721, |
|
"grad_norm": 1.3502869776338615, |
|
"learning_rate": 9.75509995048404e-07, |
|
"loss": 0.2681, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.4130232558139535, |
|
"grad_norm": 1.337863106664254, |
|
"learning_rate": 9.53020661707148e-07, |
|
"loss": 0.2695, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.427906976744186, |
|
"grad_norm": 1.3624966178122666, |
|
"learning_rate": 9.310164574169911e-07, |
|
"loss": 0.2661, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.4427906976744187, |
|
"grad_norm": 1.4427762559619615, |
|
"learning_rate": 9.095033226350787e-07, |
|
"loss": 0.2682, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.4576744186046513, |
|
"grad_norm": 1.3824477379757505, |
|
"learning_rate": 8.884870652449176e-07, |
|
"loss": 0.2683, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.472558139534884, |
|
"grad_norm": 1.3968898578295752, |
|
"learning_rate": 8.679733589884308e-07, |
|
"loss": 0.2676, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.4874418604651165, |
|
"grad_norm": 1.403396644885307, |
|
"learning_rate": 8.479677419342195e-07, |
|
"loss": 0.2675, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.5023255813953487, |
|
"grad_norm": 1.4635100367305935, |
|
"learning_rate": 8.284756149824561e-07, |
|
"loss": 0.2691, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.5172093023255813, |
|
"grad_norm": 1.4243622651693753, |
|
"learning_rate": 8.095022404068078e-07, |
|
"loss": 0.269, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.532093023255814, |
|
"grad_norm": 1.379314761648038, |
|
"learning_rate": 7.910527404337846e-07, |
|
"loss": 0.2687, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.5469767441860465, |
|
"grad_norm": 1.3800285755689508, |
|
"learning_rate": 7.731320958598944e-07, |
|
"loss": 0.2687, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.561860465116279, |
|
"grad_norm": 1.4168008157356147, |
|
"learning_rate": 7.557451447069862e-07, |
|
"loss": 0.2686, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.5767441860465117, |
|
"grad_norm": 1.405042195482243, |
|
"learning_rate": 7.388965809161264e-07, |
|
"loss": 0.273, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.5916279069767443, |
|
"grad_norm": 1.339874655650546, |
|
"learning_rate": 7.225909530803849e-07, |
|
"loss": 0.2702, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.606511627906977, |
|
"grad_norm": 1.376936805811691, |
|
"learning_rate": 7.068326632168529e-07, |
|
"loss": 0.2682, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.6213953488372095, |
|
"grad_norm": 1.370179020476883, |
|
"learning_rate": 6.91625965578234e-07, |
|
"loss": 0.268, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.6362790697674416, |
|
"grad_norm": 1.3701048711478194, |
|
"learning_rate": 6.769749655043278e-07, |
|
"loss": 0.2678, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.6511627906976747, |
|
"grad_norm": 1.4206830049678218, |
|
"learning_rate": 6.628836183137136e-07, |
|
"loss": 0.2701, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.666046511627907, |
|
"grad_norm": 1.393374140302477, |
|
"learning_rate": 6.493557282359362e-07, |
|
"loss": 0.2687, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.6809302325581394, |
|
"grad_norm": 1.461263759783157, |
|
"learning_rate": 6.363949473844831e-07, |
|
"loss": 0.268, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.695813953488372, |
|
"grad_norm": 1.4734006960029564, |
|
"learning_rate": 6.240047747708234e-07, |
|
"loss": 0.2677, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.7106976744186047, |
|
"grad_norm": 1.3366661643587023, |
|
"learning_rate": 6.121885553597864e-07, |
|
"loss": 0.2681, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.7255813953488373, |
|
"grad_norm": 1.422096088237659, |
|
"learning_rate": 6.009494791665193e-07, |
|
"loss": 0.2696, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.74046511627907, |
|
"grad_norm": 1.34042220211478, |
|
"learning_rate": 5.902905803952853e-07, |
|
"loss": 0.2707, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.7553488372093025, |
|
"grad_norm": 1.43033974764486, |
|
"learning_rate": 5.802147366203209e-07, |
|
"loss": 0.2682, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.7702325581395346, |
|
"grad_norm": 1.3377632425473605, |
|
"learning_rate": 5.707246680089786e-07, |
|
"loss": 0.2682, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.7851162790697677, |
|
"grad_norm": 1.3552116715763636, |
|
"learning_rate": 5.618229365873664e-07, |
|
"loss": 0.2679, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 1.407612240704182, |
|
"learning_rate": 5.535119455486798e-07, |
|
"loss": 0.2671, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.8148837209302324, |
|
"grad_norm": 1.3307680080669904, |
|
"learning_rate": 5.457939386044124e-07, |
|
"loss": 0.2691, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.829767441860465, |
|
"grad_norm": 1.33966014097135, |
|
"learning_rate": 5.386709993786254e-07, |
|
"loss": 0.2691, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.8446511627906976, |
|
"grad_norm": 1.3388112782785693, |
|
"learning_rate": 5.321450508454304e-07, |
|
"loss": 0.2678, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.8595348837209302, |
|
"grad_norm": 1.3186034865675909, |
|
"learning_rate": 5.262178548098479e-07, |
|
"loss": 0.2668, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.874418604651163, |
|
"grad_norm": 1.3351972694593512, |
|
"learning_rate": 5.208910114321729e-07, |
|
"loss": 0.2662, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.8893023255813954, |
|
"grad_norm": 1.373685192971363, |
|
"learning_rate": 5.161659587959818e-07, |
|
"loss": 0.2669, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.904186046511628, |
|
"grad_norm": 1.3931156997045835, |
|
"learning_rate": 5.120439725198932e-07, |
|
"loss": 0.2663, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.9190697674418606, |
|
"grad_norm": 1.2915260316130637, |
|
"learning_rate": 5.085261654131918e-07, |
|
"loss": 0.2632, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.933953488372093, |
|
"grad_norm": 1.3421708919414022, |
|
"learning_rate": 5.056134871754014e-07, |
|
"loss": 0.2705, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.948837209302326, |
|
"grad_norm": 1.3219790823176543, |
|
"learning_rate": 5.03306724139899e-07, |
|
"loss": 0.2675, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.963720930232558, |
|
"grad_norm": 1.3110383511595092, |
|
"learning_rate": 5.016064990616251e-07, |
|
"loss": 0.2652, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.9786046511627906, |
|
"grad_norm": 1.312152439222843, |
|
"learning_rate": 5.005132709489625e-07, |
|
"loss": 0.2666, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.993488372093023, |
|
"grad_norm": 1.385415982308588, |
|
"learning_rate": 5.000273349398159e-07, |
|
"loss": 0.2664, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.997953488372093, |
|
"eval_loss": 0.05878664180636406, |
|
"eval_runtime": 453.0631, |
|
"eval_samples_per_second": 39.957, |
|
"eval_steps_per_second": 0.625, |
|
"step": 2013 |
|
}, |
|
{ |
|
"epoch": 2.997953488372093, |
|
"step": 2013, |
|
"total_flos": 3371640595415040.0, |
|
"train_loss": 0.382664002355982, |
|
"train_runtime": 66379.5775, |
|
"train_samples_per_second": 15.545, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2013, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3371640595415040.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|