|
{ |
|
"best_global_step": 10000, |
|
"best_metric": 0.0, |
|
"best_model_checkpoint": "./results_code_to_text_codemodernbert_gpt2_all_languages/checkpoint-10000", |
|
"epoch": 1.151688790348124, |
|
"eval_steps": 10000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.011517751734861355, |
|
"grad_norm": 2.149099826812744, |
|
"learning_rate": 9.999664806726203e-05, |
|
"loss": 7.9953, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02303550346972271, |
|
"grad_norm": 2.219588041305542, |
|
"learning_rate": 9.998602828889252e-05, |
|
"loss": 4.0086, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03455325520458406, |
|
"grad_norm": 2.8714349269866943, |
|
"learning_rate": 9.996813639049668e-05, |
|
"loss": 3.5128, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04607100693944542, |
|
"grad_norm": 2.233255386352539, |
|
"learning_rate": 9.994297497504253e-05, |
|
"loss": 2.5397, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.057588758674306774, |
|
"grad_norm": 3.247396469116211, |
|
"learning_rate": 9.991054770308961e-05, |
|
"loss": 1.9996, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06910651040916813, |
|
"grad_norm": 3.2835168838500977, |
|
"learning_rate": 9.987085929225645e-05, |
|
"loss": 1.7329, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08062426214402948, |
|
"grad_norm": 2.6005859375, |
|
"learning_rate": 9.98239155165343e-05, |
|
"loss": 1.6232, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09214201387889084, |
|
"grad_norm": 1.9255400896072388, |
|
"learning_rate": 9.976972320544693e-05, |
|
"loss": 1.5533, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.10365976561375219, |
|
"grad_norm": 2.3379406929016113, |
|
"learning_rate": 9.970829024305725e-05, |
|
"loss": 1.5081, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.11517751734861355, |
|
"grad_norm": 1.4015233516693115, |
|
"learning_rate": 9.963962556682021e-05, |
|
"loss": 1.4448, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1266952690834749, |
|
"grad_norm": 2.204472541809082, |
|
"learning_rate": 9.956373916628259e-05, |
|
"loss": 1.3501, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.13821302081833625, |
|
"grad_norm": 2.014930248260498, |
|
"learning_rate": 9.948064208162966e-05, |
|
"loss": 1.3379, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1497307725531976, |
|
"grad_norm": 1.2242907285690308, |
|
"learning_rate": 9.939034640207901e-05, |
|
"loss": 1.3679, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.16124852428805897, |
|
"grad_norm": 1.4741770029067993, |
|
"learning_rate": 9.929286526412182e-05, |
|
"loss": 1.2859, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.17276627602292033, |
|
"grad_norm": 1.6436251401901245, |
|
"learning_rate": 9.91882128496117e-05, |
|
"loss": 1.2964, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1842840277577817, |
|
"grad_norm": 1.5630017518997192, |
|
"learning_rate": 9.907640438370148e-05, |
|
"loss": 1.3205, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.19580177949264305, |
|
"grad_norm": 1.3735089302062988, |
|
"learning_rate": 9.895745613262816e-05, |
|
"loss": 1.2516, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.20731953122750438, |
|
"grad_norm": 1.54973566532135, |
|
"learning_rate": 9.883138540134653e-05, |
|
"loss": 1.2208, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.21883728296236574, |
|
"grad_norm": 1.3270553350448608, |
|
"learning_rate": 9.869821053101151e-05, |
|
"loss": 1.3005, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2303550346972271, |
|
"grad_norm": 1.321956753730774, |
|
"learning_rate": 9.85579508963099e-05, |
|
"loss": 1.2757, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24187278643208845, |
|
"grad_norm": 1.3433573246002197, |
|
"learning_rate": 9.841062690264163e-05, |
|
"loss": 1.2155, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2533905381669498, |
|
"grad_norm": 1.2773957252502441, |
|
"learning_rate": 9.825625998315115e-05, |
|
"loss": 1.2106, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.26490828990181114, |
|
"grad_norm": 1.2487130165100098, |
|
"learning_rate": 9.809487259560925e-05, |
|
"loss": 1.1769, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2764260416366725, |
|
"grad_norm": 1.2487260103225708, |
|
"learning_rate": 9.792648821914587e-05, |
|
"loss": 1.2285, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.28794379337153386, |
|
"grad_norm": 2.003361940383911, |
|
"learning_rate": 9.775113135083427e-05, |
|
"loss": 1.1602, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2994615451063952, |
|
"grad_norm": 1.1892813444137573, |
|
"learning_rate": 9.756882750212708e-05, |
|
"loss": 1.1457, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3109792968412566, |
|
"grad_norm": 0.9468798637390137, |
|
"learning_rate": 9.737960319514486e-05, |
|
"loss": 1.1555, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.32249704857611794, |
|
"grad_norm": 1.7301925420761108, |
|
"learning_rate": 9.718348595881761e-05, |
|
"loss": 1.1718, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3340148003109793, |
|
"grad_norm": 1.5235856771469116, |
|
"learning_rate": 9.698050432487968e-05, |
|
"loss": 1.1165, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.34553255204584066, |
|
"grad_norm": 1.4494585990905762, |
|
"learning_rate": 9.6770687823719e-05, |
|
"loss": 1.1172, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.357050303780702, |
|
"grad_norm": 1.3340733051300049, |
|
"learning_rate": 9.655406698008081e-05, |
|
"loss": 1.1798, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.3685680555155634, |
|
"grad_norm": 1.0183467864990234, |
|
"learning_rate": 9.633067330862688e-05, |
|
"loss": 1.1363, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.38008580725042473, |
|
"grad_norm": 0.9531416893005371, |
|
"learning_rate": 9.61005393093507e-05, |
|
"loss": 1.2012, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.3916035589852861, |
|
"grad_norm": 0.9986298680305481, |
|
"learning_rate": 9.586369846284912e-05, |
|
"loss": 1.1742, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.40312131072014745, |
|
"grad_norm": 1.2070988416671753, |
|
"learning_rate": 9.562018522545175e-05, |
|
"loss": 1.1404, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.41463906245500876, |
|
"grad_norm": 0.9883187413215637, |
|
"learning_rate": 9.537003502420789e-05, |
|
"loss": 1.1168, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.4261568141898701, |
|
"grad_norm": 1.2827179431915283, |
|
"learning_rate": 9.511328425173265e-05, |
|
"loss": 1.167, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.4376745659247315, |
|
"grad_norm": 1.667656421661377, |
|
"learning_rate": 9.484997026091238e-05, |
|
"loss": 1.073, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.44919231765959283, |
|
"grad_norm": 1.2254990339279175, |
|
"learning_rate": 9.458013135947051e-05, |
|
"loss": 1.0808, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.4607100693944542, |
|
"grad_norm": 1.6514670848846436, |
|
"learning_rate": 9.430380680439435e-05, |
|
"loss": 1.1928, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47222782112931555, |
|
"grad_norm": 1.2024292945861816, |
|
"learning_rate": 9.402103679622392e-05, |
|
"loss": 1.0847, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.4837455728641769, |
|
"grad_norm": 1.0424613952636719, |
|
"learning_rate": 9.373186247320345e-05, |
|
"loss": 1.0647, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.49526332459903827, |
|
"grad_norm": 1.5781737565994263, |
|
"learning_rate": 9.343632590529642e-05, |
|
"loss": 1.1076, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5067810763338996, |
|
"grad_norm": 0.9091070890426636, |
|
"learning_rate": 9.313447008806515e-05, |
|
"loss": 1.0865, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5182988280687609, |
|
"grad_norm": 1.4019910097122192, |
|
"learning_rate": 9.282633893641564e-05, |
|
"loss": 1.074, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5298165798036223, |
|
"grad_norm": 1.0551828145980835, |
|
"learning_rate": 9.251197727820871e-05, |
|
"loss": 1.0721, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5413343315384836, |
|
"grad_norm": 0.7485769987106323, |
|
"learning_rate": 9.219143084773828e-05, |
|
"loss": 1.0482, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.552852083273345, |
|
"grad_norm": 1.1340844631195068, |
|
"learning_rate": 9.186474627907784e-05, |
|
"loss": 1.1191, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5643698350082064, |
|
"grad_norm": 1.5390311479568481, |
|
"learning_rate": 9.153197109929595e-05, |
|
"loss": 1.1175, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5758875867430677, |
|
"grad_norm": 1.3286751508712769, |
|
"learning_rate": 9.119315372154189e-05, |
|
"loss": 1.0089, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5874053384779291, |
|
"grad_norm": 1.0134896039962769, |
|
"learning_rate": 9.084834343800234e-05, |
|
"loss": 1.0604, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.5989230902127904, |
|
"grad_norm": 1.1101710796356201, |
|
"learning_rate": 9.049759041273023e-05, |
|
"loss": 1.0266, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6104408419476518, |
|
"grad_norm": 0.9307985901832581, |
|
"learning_rate": 9.014094567434668e-05, |
|
"loss": 1.042, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6219585936825132, |
|
"grad_norm": 1.295506238937378, |
|
"learning_rate": 8.977846110861722e-05, |
|
"loss": 1.102, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.6334763454173745, |
|
"grad_norm": 1.0131995677947998, |
|
"learning_rate": 8.941018945090334e-05, |
|
"loss": 1.0017, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.6449940971522359, |
|
"grad_norm": 0.9528135061264038, |
|
"learning_rate": 8.90361842784903e-05, |
|
"loss": 1.0597, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6565118488870972, |
|
"grad_norm": 0.8767136335372925, |
|
"learning_rate": 8.86565000027926e-05, |
|
"loss": 1.0354, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.6680296006219586, |
|
"grad_norm": 0.7215752601623535, |
|
"learning_rate": 8.8271191861438e-05, |
|
"loss": 1.0266, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.67954735235682, |
|
"grad_norm": 0.9168160557746887, |
|
"learning_rate": 8.788031591023145e-05, |
|
"loss": 0.9974, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.6910651040916813, |
|
"grad_norm": 0.9805298447608948, |
|
"learning_rate": 8.748392901499985e-05, |
|
"loss": 1.0205, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7025828558265427, |
|
"grad_norm": 0.9025911092758179, |
|
"learning_rate": 8.708208884331907e-05, |
|
"loss": 1.0282, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.714100607561404, |
|
"grad_norm": 0.7643795609474182, |
|
"learning_rate": 8.667485385612433e-05, |
|
"loss": 1.0281, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.7256183592962654, |
|
"grad_norm": 1.015793800354004, |
|
"learning_rate": 8.6262283299205e-05, |
|
"loss": 1.0153, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.7371361110311268, |
|
"grad_norm": 0.9706717133522034, |
|
"learning_rate": 8.58444371945855e-05, |
|
"loss": 0.9879, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.7486538627659881, |
|
"grad_norm": 0.8967713117599487, |
|
"learning_rate": 8.5421376331793e-05, |
|
"loss": 1.044, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.7601716145008495, |
|
"grad_norm": 1.0314196348190308, |
|
"learning_rate": 8.499316225901364e-05, |
|
"loss": 1.0163, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.7716893662357108, |
|
"grad_norm": 0.9607068300247192, |
|
"learning_rate": 8.455985727413825e-05, |
|
"loss": 1.0047, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.7832071179705722, |
|
"grad_norm": 0.8781064748764038, |
|
"learning_rate": 8.41215244156991e-05, |
|
"loss": 0.9683, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7947248697054335, |
|
"grad_norm": 0.7523179054260254, |
|
"learning_rate": 8.367822745369891e-05, |
|
"loss": 0.9814, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.8062426214402949, |
|
"grad_norm": 0.7084505558013916, |
|
"learning_rate": 8.323003088033327e-05, |
|
"loss": 1.0103, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8177603731751563, |
|
"grad_norm": 0.8509836792945862, |
|
"learning_rate": 8.277699990060826e-05, |
|
"loss": 0.947, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.8292781249100175, |
|
"grad_norm": 0.9924744963645935, |
|
"learning_rate": 8.231920042285408e-05, |
|
"loss": 0.9985, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8407958766448789, |
|
"grad_norm": 0.9982401132583618, |
|
"learning_rate": 8.185669904913669e-05, |
|
"loss": 1.004, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.8523136283797402, |
|
"grad_norm": 0.9341840744018555, |
|
"learning_rate": 8.138956306556813e-05, |
|
"loss": 1.005, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.8638313801146016, |
|
"grad_norm": 1.4367643594741821, |
|
"learning_rate": 8.09178604325177e-05, |
|
"loss": 0.9949, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.875349131849463, |
|
"grad_norm": 0.7783756256103516, |
|
"learning_rate": 8.044165977472476e-05, |
|
"loss": 0.9786, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.8868668835843243, |
|
"grad_norm": 1.033542513847351, |
|
"learning_rate": 7.99610303713151e-05, |
|
"loss": 0.9979, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.8983846353191857, |
|
"grad_norm": 0.7946944832801819, |
|
"learning_rate": 7.947604214572189e-05, |
|
"loss": 0.9736, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.909902387054047, |
|
"grad_norm": 0.8648232221603394, |
|
"learning_rate": 7.89867656555131e-05, |
|
"loss": 0.9916, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.9214201387889084, |
|
"grad_norm": 1.1316163539886475, |
|
"learning_rate": 7.849327208212653e-05, |
|
"loss": 1.0083, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9329378905237697, |
|
"grad_norm": 0.731547474861145, |
|
"learning_rate": 7.799563322051407e-05, |
|
"loss": 0.9868, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.9444556422586311, |
|
"grad_norm": 0.7558659315109253, |
|
"learning_rate": 7.749392146869688e-05, |
|
"loss": 0.9378, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.9559733939934925, |
|
"grad_norm": 1.1830765008926392, |
|
"learning_rate": 7.698820981723252e-05, |
|
"loss": 0.947, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.9674911457283538, |
|
"grad_norm": 1.1989309787750244, |
|
"learning_rate": 7.647857183859626e-05, |
|
"loss": 0.9973, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.9790088974632152, |
|
"grad_norm": 0.7769212126731873, |
|
"learning_rate": 7.596508167647738e-05, |
|
"loss": 0.983, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.9905266491980765, |
|
"grad_norm": 0.8555830121040344, |
|
"learning_rate": 7.544781403499261e-05, |
|
"loss": 0.9472, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.0019580177949263, |
|
"grad_norm": 0.9663079380989075, |
|
"learning_rate": 7.492684416781785e-05, |
|
"loss": 0.9254, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.0134757695297878, |
|
"grad_norm": 1.0730838775634766, |
|
"learning_rate": 7.44022478672402e-05, |
|
"loss": 0.7473, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.024993521264649, |
|
"grad_norm": 1.2075188159942627, |
|
"learning_rate": 7.38741014531313e-05, |
|
"loss": 0.7745, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.0365112729995105, |
|
"grad_norm": 0.724323034286499, |
|
"learning_rate": 7.334248176184411e-05, |
|
"loss": 0.7592, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0480290247343718, |
|
"grad_norm": 0.9635033011436462, |
|
"learning_rate": 7.280746613503463e-05, |
|
"loss": 0.7708, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.0595467764692332, |
|
"grad_norm": 1.1375808715820312, |
|
"learning_rate": 7.226913240840992e-05, |
|
"loss": 0.7739, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.0710645282040945, |
|
"grad_norm": 1.0138040781021118, |
|
"learning_rate": 7.172755890040428e-05, |
|
"loss": 0.7582, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.082582279938956, |
|
"grad_norm": 0.9169549345970154, |
|
"learning_rate": 7.118282440078535e-05, |
|
"loss": 0.7488, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.0941000316738172, |
|
"grad_norm": 0.8004472851753235, |
|
"learning_rate": 7.063500815919148e-05, |
|
"loss": 0.7401, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.1056177834086787, |
|
"grad_norm": 1.0395482778549194, |
|
"learning_rate": 7.008418987360222e-05, |
|
"loss": 0.7769, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.11713553514354, |
|
"grad_norm": 1.0113108158111572, |
|
"learning_rate": 6.953044967874367e-05, |
|
"loss": 0.8, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.1286532868784014, |
|
"grad_norm": 0.8164701461791992, |
|
"learning_rate": 6.897386813443019e-05, |
|
"loss": 0.7473, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.1401710386132626, |
|
"grad_norm": 0.7086979150772095, |
|
"learning_rate": 6.84145262138444e-05, |
|
"loss": 0.7732, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.151688790348124, |
|
"grad_norm": 1.1815799474716187, |
|
"learning_rate": 6.785250529175679e-05, |
|
"loss": 0.7656, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.151688790348124, |
|
"eval_gen_len": 0.0, |
|
"eval_loss": 0.12472382932901382, |
|
"eval_rouge1": 0.0, |
|
"eval_rouge2": 0.0, |
|
"eval_rougeL": 0.0, |
|
"eval_rougeLsum": 0.0, |
|
"eval_runtime": 474.3304, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.673, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 26046, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.24479560268356e+18, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|