|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 390, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0128, |
|
"grad_norm": 182.65389468504085, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 3.0348, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0256, |
|
"grad_norm": 139.49962686394534, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.3592, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0384, |
|
"grad_norm": 73.38280389729819, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.8854, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0512, |
|
"grad_norm": 10.581257887011294, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.437, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 19.483047427550837, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2943, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0768, |
|
"grad_norm": 11.913519176859392, |
|
"learning_rate": 3e-06, |
|
"loss": 0.2626, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0896, |
|
"grad_norm": 11.341258695250447, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.234, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1024, |
|
"grad_norm": 7.3736324646601865, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2023, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1152, |
|
"grad_norm": 8.384878179412805, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1772, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 4.321321712106481, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1648, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1408, |
|
"grad_norm": 8.223137316430781, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.1712, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1536, |
|
"grad_norm": 5.267777408485924, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1414, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1664, |
|
"grad_norm": 6.820714261557992, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.1356, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1792, |
|
"grad_norm": 10.169624218399154, |
|
"learning_rate": 7e-06, |
|
"loss": 0.1307, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 4.648811703272445, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.1272, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2048, |
|
"grad_norm": 7.8160709736421365, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.1238, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2176, |
|
"grad_norm": 4.6578435324125005, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.1188, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2304, |
|
"grad_norm": 4.334284814503559, |
|
"learning_rate": 9e-06, |
|
"loss": 0.1102, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2432, |
|
"grad_norm": 5.185815667957919, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.1321, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 5.055357962960289, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2688, |
|
"grad_norm": 5.024069288121602, |
|
"learning_rate": 1.0500000000000001e-05, |
|
"loss": 0.1226, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2816, |
|
"grad_norm": 7.4125001237167805, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.1178, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2944, |
|
"grad_norm": 6.3618508498418835, |
|
"learning_rate": 1.15e-05, |
|
"loss": 0.1149, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3072, |
|
"grad_norm": 6.469859734653322, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.1246, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 5.006561942877804, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2708, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3328, |
|
"grad_norm": 3.986948414918879, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.1547, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3456, |
|
"grad_norm": 9.389803413840562, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.1285, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3584, |
|
"grad_norm": 11.72955750893183, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.1346, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3712, |
|
"grad_norm": 5.134775487996449, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.1382, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 4.474102477655389, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.127, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3968, |
|
"grad_norm": 2.9780337882613246, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.1232, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4096, |
|
"grad_norm": 4.373151697292325, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.1251, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4224, |
|
"grad_norm": 9.660073045187783, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.1256, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4352, |
|
"grad_norm": 6.853478399209637, |
|
"learning_rate": 1.7e-05, |
|
"loss": 0.1414, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 8.677517234598955, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.206, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4608, |
|
"grad_norm": 8.651004571305064, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.1425, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4736, |
|
"grad_norm": 5.901558970919691, |
|
"learning_rate": 1.8500000000000002e-05, |
|
"loss": 0.1207, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4864, |
|
"grad_norm": 3.882452425752908, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.1184, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4992, |
|
"grad_norm": 10.279050422228723, |
|
"learning_rate": 1.95e-05, |
|
"loss": 0.1266, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 156.76899002893614, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5248, |
|
"grad_norm": 5.748361151055656, |
|
"learning_rate": 1.9473684210526318e-05, |
|
"loss": 0.2735, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5376, |
|
"grad_norm": 6.707674047117148, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 0.2056, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5504, |
|
"grad_norm": 4.28932948719806, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 0.1361, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5632, |
|
"grad_norm": 3.9017098971521986, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 0.1207, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 6.2162092103600814, |
|
"learning_rate": 1.736842105263158e-05, |
|
"loss": 0.1317, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5888, |
|
"grad_norm": 5.854326003876861, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 0.1445, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6016, |
|
"grad_norm": 6.246048863269034, |
|
"learning_rate": 1.6315789473684213e-05, |
|
"loss": 0.1171, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6144, |
|
"grad_norm": 3.7109059300429044, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 0.1141, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6272, |
|
"grad_norm": 3.5668255484420706, |
|
"learning_rate": 1.5263157894736846e-05, |
|
"loss": 0.1037, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.782059977111824, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 0.1106, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6528, |
|
"grad_norm": 10.5984194077511, |
|
"learning_rate": 1.4210526315789475e-05, |
|
"loss": 0.1117, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6656, |
|
"grad_norm": 3.779958670519677, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 0.1117, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6784, |
|
"grad_norm": 5.911614916259078, |
|
"learning_rate": 1.3157894736842108e-05, |
|
"loss": 0.1354, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6912, |
|
"grad_norm": 5.80946707419217, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 0.1169, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 3.2969117252565776, |
|
"learning_rate": 1.2105263157894737e-05, |
|
"loss": 0.106, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7168, |
|
"grad_norm": 5.038146795065825, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 0.1133, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7296, |
|
"grad_norm": 6.738857671001296, |
|
"learning_rate": 1.105263157894737e-05, |
|
"loss": 0.1077, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7424, |
|
"grad_norm": 4.382581837531665, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.1176, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7552, |
|
"grad_norm": 4.792805538486606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1035, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 4.972924462435217, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.1087, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7808, |
|
"grad_norm": 15.861511630609764, |
|
"learning_rate": 8.947368421052632e-06, |
|
"loss": 0.1009, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7936, |
|
"grad_norm": 7.880322698758499, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 0.1, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8064, |
|
"grad_norm": 3.4154322142376197, |
|
"learning_rate": 7.894736842105265e-06, |
|
"loss": 0.0992, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8192, |
|
"grad_norm": 4.078488845111544, |
|
"learning_rate": 7.368421052631579e-06, |
|
"loss": 0.1054, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 3.5309378515758643, |
|
"learning_rate": 6.842105263157896e-06, |
|
"loss": 0.0974, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8448, |
|
"grad_norm": 6.343576179177006, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 0.1005, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8576, |
|
"grad_norm": 3.801584889448848, |
|
"learning_rate": 5.789473684210527e-06, |
|
"loss": 0.089, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8704, |
|
"grad_norm": 6.129470969543439, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.0909, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8832, |
|
"grad_norm": 5.965460894326214, |
|
"learning_rate": 4.736842105263158e-06, |
|
"loss": 0.0901, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 2.642223202809459, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.0932, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9088, |
|
"grad_norm": 3.9893969211639755, |
|
"learning_rate": 3.6842105263157896e-06, |
|
"loss": 0.0821, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9216, |
|
"grad_norm": 4.071378981195404, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.0908, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9344, |
|
"grad_norm": 3.7877745428251592, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 0.0835, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9472, |
|
"grad_norm": 5.175552389302117, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 0.0812, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.966653575974842, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.0713, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9728, |
|
"grad_norm": 4.135076374239712, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 0.0751, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9856, |
|
"grad_norm": 2.978106249425422, |
|
"learning_rate": 5.263157894736843e-07, |
|
"loss": 0.0753, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"grad_norm": 3.437868997060425, |
|
"learning_rate": 0.0, |
|
"loss": 0.0659, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"eval_accuracy": 0.007717635564240352, |
|
"eval_loss": 0.0636017844080925, |
|
"eval_runtime": 11.9633, |
|
"eval_samples_per_second": 83.589, |
|
"eval_steps_per_second": 2.675, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9984, |
|
"logic_eval_extr_stps.D-0.answer_accuracy": 0.1111111111111111, |
|
"logic_eval_extr_stps.D-0.proof_accuracy.zero_one": 0.1111111111111111, |
|
"logic_eval_extr_stps.D-0.rouge1": 15.764555555555555, |
|
"logic_eval_extr_stps.D-0.rouge2": 1.5705666666666667, |
|
"logic_eval_extr_stps.D-0.rougeL": 15.764555555555555, |
|
"logic_eval_extr_stps.D-0.rougeLsum": 15.764555555555555, |
|
"logic_eval_extr_stps.D-1.answer_accuracy": 0.11764705882352941, |
|
"logic_eval_extr_stps.D-1.proof_accuracy.zero_one": 0.0, |
|
"logic_eval_extr_stps.D-1.rouge1": 7.244564705882353, |
|
"logic_eval_extr_stps.D-1.rouge2": 0.6124411764705883, |
|
"logic_eval_extr_stps.D-1.rougeL": 6.963717647058823, |
|
"logic_eval_extr_stps.D-1.rougeLsum": 7.244564705882353, |
|
"logic_eval_extr_stps.D-2.answer_accuracy": 0.45454545454545453, |
|
"logic_eval_extr_stps.D-2.proof_accuracy.zero_one": 0.22727272727272727, |
|
"logic_eval_extr_stps.D-2.rouge1": 21.115345454545455, |
|
"logic_eval_extr_stps.D-2.rouge2": 7.12185, |
|
"logic_eval_extr_stps.D-2.rougeL": 20.353104545454546, |
|
"logic_eval_extr_stps.D-2.rougeLsum": 21.03720909090909, |
|
"logic_eval_extr_stps.D-3.answer_accuracy": 0.23809523809523808, |
|
"logic_eval_extr_stps.D-3.proof_accuracy.zero_one": 0.047619047619047616, |
|
"logic_eval_extr_stps.D-3.rouge1": 10.493052380952381, |
|
"logic_eval_extr_stps.D-3.rouge2": 2.243357142857143, |
|
"logic_eval_extr_stps.D-3.rougeL": 9.783571428571431, |
|
"logic_eval_extr_stps.D-3.rougeLsum": 10.493052380952381, |
|
"logic_eval_extr_stps.D-None.answer_accuracy": 0.15625, |
|
"logic_eval_extr_stps.D-None.proof_accuracy.zero_one": 0.15625, |
|
"logic_eval_extr_stps.D-None.rouge1": 16.7680875, |
|
"logic_eval_extr_stps.D-None.rouge2": 0.0, |
|
"logic_eval_extr_stps.D-None.rougeL": 16.7680875, |
|
"logic_eval_extr_stps.D-None.rougeLsum": 16.7680875, |
|
"logic_eval_extr_stps.D-all.answer_accuracy": 0.22772277227722773, |
|
"logic_eval_extr_stps.D-all.proof_accuracy.zero_one": 0.1188118811881188, |
|
"logic_eval_extr_stps.D-all.rouge1": 14.71791188118812, |
|
"logic_eval_extr_stps.D-all.rouge2": 2.260770297029703, |
|
"logic_eval_extr_stps.D-all.rougeL": 14.357092079207925, |
|
"logic_eval_extr_stps.D-all.rougeLsum": 14.700892079207922, |
|
"logic_eval_gen_len": 354.359375, |
|
"logic_eval_runtime": 335.0776, |
|
"logic_eval_samples_per_second": 0.301, |
|
"logic_eval_steps_per_second": 0.012, |
|
"logic_eval_strct.D-0.answer_accuracy": 0.1111111111111111, |
|
"logic_eval_strct.D-0.proof_accuracy.zero_one": 0.1111111111111111, |
|
"logic_eval_strct.D-0.rouge1": 15.764555555555555, |
|
"logic_eval_strct.D-0.rouge2": 1.5705666666666667, |
|
"logic_eval_strct.D-0.rougeL": 15.764555555555555, |
|
"logic_eval_strct.D-0.rougeLsum": 15.764555555555555, |
|
"logic_eval_strct.D-1.answer_accuracy": 0.11764705882352941, |
|
"logic_eval_strct.D-1.proof_accuracy.zero_one": 0.0, |
|
"logic_eval_strct.D-1.rouge1": 7.244564705882353, |
|
"logic_eval_strct.D-1.rouge2": 0.6124411764705883, |
|
"logic_eval_strct.D-1.rougeL": 6.963717647058823, |
|
"logic_eval_strct.D-1.rougeLsum": 7.244564705882353, |
|
"logic_eval_strct.D-2.answer_accuracy": 0.45454545454545453, |
|
"logic_eval_strct.D-2.proof_accuracy.zero_one": 0.13636363636363635, |
|
"logic_eval_strct.D-2.rouge1": 21.115345454545455, |
|
"logic_eval_strct.D-2.rouge2": 7.12185, |
|
"logic_eval_strct.D-2.rougeL": 20.353104545454546, |
|
"logic_eval_strct.D-2.rougeLsum": 21.03720909090909, |
|
"logic_eval_strct.D-3.answer_accuracy": 0.23809523809523808, |
|
"logic_eval_strct.D-3.proof_accuracy.zero_one": 0.047619047619047616, |
|
"logic_eval_strct.D-3.rouge1": 10.493052380952381, |
|
"logic_eval_strct.D-3.rouge2": 2.243357142857143, |
|
"logic_eval_strct.D-3.rougeL": 9.783571428571431, |
|
"logic_eval_strct.D-3.rougeLsum": 10.493052380952381, |
|
"logic_eval_strct.D-None.answer_accuracy": 0.15625, |
|
"logic_eval_strct.D-None.proof_accuracy.zero_one": 0.15625, |
|
"logic_eval_strct.D-None.rouge1": 16.7680875, |
|
"logic_eval_strct.D-None.rouge2": 0.0, |
|
"logic_eval_strct.D-None.rougeL": 16.7680875, |
|
"logic_eval_strct.D-None.rougeLsum": 16.7680875, |
|
"logic_eval_strct.D-all.answer_accuracy": 0.22772277227722773, |
|
"logic_eval_strct.D-all.proof_accuracy.zero_one": 0.09900990099009901, |
|
"logic_eval_strct.D-all.rouge1": 14.71791188118812, |
|
"logic_eval_strct.D-all.rouge2": 2.260770297029703, |
|
"logic_eval_strct.D-all.rougeL": 14.357092079207925, |
|
"logic_eval_strct.D-all.rougeLsum": 14.700892079207922, |
|
"step": 390 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 390, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 390, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 326527573032960.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|