|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 1750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9971428571428576e-05, |
|
"loss": 18.0539, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9e-05, |
|
"loss": 4.1309, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.8826666666666667, |
|
"eval_f1": 0.8778625954198473, |
|
"eval_loss": 0.34147411584854126, |
|
"eval_precision": 0.9153400868306801, |
|
"eval_recall": 0.8433333333333334, |
|
"eval_runtime": 463.3794, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 0.809, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6366, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.9403333333333334, |
|
"eval_f1": 0.9423881557772771, |
|
"eval_loss": 0.139574334025383, |
|
"eval_precision": 0.911014312383323, |
|
"eval_recall": 0.976, |
|
"eval_runtime": 463.7246, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 0.809, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.8166, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.8373333333333334, |
|
"eval_f1": 0.8576429404900816, |
|
"eval_loss": 0.8452138900756836, |
|
"eval_precision": 0.7624481327800829, |
|
"eval_recall": 0.98, |
|
"eval_runtime": 463.2546, |
|
"eval_samples_per_second": 6.476, |
|
"eval_steps_per_second": 0.809, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.8141, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.8703333333333333, |
|
"eval_f1": 0.8838459241564646, |
|
"eval_loss": 2.6052372455596924, |
|
"eval_precision": 0.8004326663061114, |
|
"eval_recall": 0.9866666666666667, |
|
"eval_runtime": 463.8217, |
|
"eval_samples_per_second": 6.468, |
|
"eval_steps_per_second": 0.809, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.994, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.8933333333333333, |
|
"eval_f1": 0.9029714978775015, |
|
"eval_loss": 0.5267955660820007, |
|
"eval_precision": 0.8281423804226918, |
|
"eval_recall": 0.9926666666666667, |
|
"eval_runtime": 463.9594, |
|
"eval_samples_per_second": 6.466, |
|
"eval_steps_per_second": 0.808, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3827, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.9343333333333333, |
|
"eval_f1": 0.9345297441010303, |
|
"eval_loss": 0.296316534280777, |
|
"eval_precision": 0.9317428760768721, |
|
"eval_recall": 0.9373333333333334, |
|
"eval_runtime": 463.5088, |
|
"eval_samples_per_second": 6.472, |
|
"eval_steps_per_second": 0.809, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.5048, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9526666666666667, |
|
"eval_f1": 0.9523489932885906, |
|
"eval_loss": 0.2187061905860901, |
|
"eval_precision": 0.9587837837837838, |
|
"eval_recall": 0.946, |
|
"eval_runtime": 467.9247, |
|
"eval_samples_per_second": 6.411, |
|
"eval_steps_per_second": 0.801, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2841, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.8916666666666667, |
|
"eval_f1": 0.8787765759045133, |
|
"eval_loss": 1.027541160583496, |
|
"eval_precision": 0.9974597798475868, |
|
"eval_recall": 0.7853333333333333, |
|
"eval_runtime": 463.3965, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 0.809, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.3962, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.9496666666666667, |
|
"eval_f1": 0.948656919415165, |
|
"eval_loss": 0.3296962380409241, |
|
"eval_precision": 0.9680777238029147, |
|
"eval_recall": 0.93, |
|
"eval_runtime": 463.326, |
|
"eval_samples_per_second": 6.475, |
|
"eval_steps_per_second": 0.809, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4e-05, |
|
"loss": 0.488, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9443333333333334, |
|
"eval_f1": 0.9429839535677706, |
|
"eval_loss": 0.4798208773136139, |
|
"eval_precision": 0.966410076976907, |
|
"eval_recall": 0.9206666666666666, |
|
"eval_runtime": 463.3936, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 0.809, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.4094, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9096666666666666, |
|
"eval_f1": 0.9166922840454964, |
|
"eval_loss": 0.5374864339828491, |
|
"eval_precision": 0.8505419281232174, |
|
"eval_recall": 0.994, |
|
"eval_runtime": 463.4888, |
|
"eval_samples_per_second": 6.473, |
|
"eval_steps_per_second": 0.809, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2203, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.957, |
|
"eval_f1": 0.958130477117819, |
|
"eval_loss": 0.18050691485404968, |
|
"eval_precision": 0.9335863377609108, |
|
"eval_recall": 0.984, |
|
"eval_runtime": 463.3488, |
|
"eval_samples_per_second": 6.475, |
|
"eval_steps_per_second": 0.809, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.2526, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9566666666666667, |
|
"eval_f1": 0.9579288025889968, |
|
"eval_loss": 0.32805779576301575, |
|
"eval_precision": 0.9308176100628931, |
|
"eval_recall": 0.9866666666666667, |
|
"eval_runtime": 464.3941, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.808, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1888, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.972, |
|
"eval_f1": 0.9723502304147466, |
|
"eval_loss": 0.15383633971214294, |
|
"eval_precision": 0.9603381014304291, |
|
"eval_recall": 0.9846666666666667, |
|
"eval_runtime": 463.4989, |
|
"eval_samples_per_second": 6.473, |
|
"eval_steps_per_second": 0.809, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1859, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.9783333333333334, |
|
"eval_f1": 0.9781659388646288, |
|
"eval_loss": 0.11581222712993622, |
|
"eval_precision": 0.985781990521327, |
|
"eval_recall": 0.9706666666666667, |
|
"eval_runtime": 463.5881, |
|
"eval_samples_per_second": 6.471, |
|
"eval_steps_per_second": 0.809, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.1007, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.9753333333333334, |
|
"eval_f1": 0.9755129053606882, |
|
"eval_loss": 0.14892521500587463, |
|
"eval_precision": 0.9684625492772667, |
|
"eval_recall": 0.9826666666666667, |
|
"eval_runtime": 463.553, |
|
"eval_samples_per_second": 6.472, |
|
"eval_steps_per_second": 0.809, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.1065, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.9726666666666667, |
|
"eval_f1": 0.9720327421555252, |
|
"eval_loss": 0.15965215861797333, |
|
"eval_precision": 0.9951117318435754, |
|
"eval_recall": 0.95, |
|
"eval_runtime": 463.6429, |
|
"eval_samples_per_second": 6.47, |
|
"eval_steps_per_second": 0.809, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.2868, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.9806666666666667, |
|
"eval_f1": 0.9807180851063829, |
|
"eval_loss": 0.08715511113405228, |
|
"eval_precision": 0.9781167108753316, |
|
"eval_recall": 0.9833333333333333, |
|
"eval_runtime": 463.4149, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 0.809, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.1818, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.9796666666666667, |
|
"eval_f1": 0.9795781720790091, |
|
"eval_loss": 0.12202201038599014, |
|
"eval_precision": 0.9838601210490922, |
|
"eval_recall": 0.9753333333333334, |
|
"eval_runtime": 464.4798, |
|
"eval_samples_per_second": 6.459, |
|
"eval_steps_per_second": 0.807, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.9786666666666667, |
|
"eval_f1": 0.9784221173297369, |
|
"eval_loss": 0.16317808628082275, |
|
"eval_precision": 0.9897680763983628, |
|
"eval_recall": 0.9673333333333334, |
|
"eval_runtime": 463.2126, |
|
"eval_samples_per_second": 6.477, |
|
"eval_steps_per_second": 0.81, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.125, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.9796666666666667, |
|
"eval_f1": 0.9796054831160146, |
|
"eval_loss": 0.1086646318435669, |
|
"eval_precision": 0.9825620389000671, |
|
"eval_recall": 0.9766666666666667, |
|
"eval_runtime": 464.4048, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.807, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.1361, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.976, |
|
"eval_f1": 0.9754935330156569, |
|
"eval_loss": 0.15513776242733002, |
|
"eval_precision": 0.9965229485396384, |
|
"eval_recall": 0.9553333333333334, |
|
"eval_runtime": 463.6268, |
|
"eval_samples_per_second": 6.471, |
|
"eval_steps_per_second": 0.809, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.1276, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.9683333333333334, |
|
"eval_f1": 0.9673875729488499, |
|
"eval_loss": 0.20085427165031433, |
|
"eval_precision": 0.997169143665959, |
|
"eval_recall": 0.9393333333333334, |
|
"eval_runtime": 463.7219, |
|
"eval_samples_per_second": 6.469, |
|
"eval_steps_per_second": 0.809, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.1618, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.9766666666666667, |
|
"eval_f1": 0.9762066621346024, |
|
"eval_loss": 0.12355328351259232, |
|
"eval_precision": 0.9958391123439667, |
|
"eval_recall": 0.9573333333333334, |
|
"eval_runtime": 464.5376, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.807, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2574, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.9806666666666667, |
|
"eval_f1": 0.9804054054054054, |
|
"eval_loss": 0.126968115568161, |
|
"eval_precision": 0.9938356164383562, |
|
"eval_recall": 0.9673333333333334, |
|
"eval_runtime": 465.883, |
|
"eval_samples_per_second": 6.439, |
|
"eval_steps_per_second": 0.805, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.1482, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.9576666666666667, |
|
"eval_f1": 0.9558874609239321, |
|
"eval_loss": 0.2774529755115509, |
|
"eval_precision": 0.9978245105148659, |
|
"eval_recall": 0.9173333333333333, |
|
"eval_runtime": 465.4264, |
|
"eval_samples_per_second": 6.446, |
|
"eval_steps_per_second": 0.806, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.1156, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.9856666666666667, |
|
"eval_f1": 0.9855849815621857, |
|
"eval_loss": 0.1439618021249771, |
|
"eval_precision": 0.9912339851652057, |
|
"eval_recall": 0.98, |
|
"eval_runtime": 466.4591, |
|
"eval_samples_per_second": 6.431, |
|
"eval_steps_per_second": 0.804, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.2393, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9406666666666667, |
|
"eval_f1": 0.9369688385269122, |
|
"eval_loss": 0.37739551067352295, |
|
"eval_precision": 0.9992447129909365, |
|
"eval_recall": 0.882, |
|
"eval_runtime": 465.7379, |
|
"eval_samples_per_second": 6.441, |
|
"eval_steps_per_second": 0.805, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.2364, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.984, |
|
"eval_f1": 0.9838601210490923, |
|
"eval_loss": 0.0981753021478653, |
|
"eval_precision": 0.9925373134328358, |
|
"eval_recall": 0.9753333333333334, |
|
"eval_runtime": 464.4225, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1246, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.985, |
|
"eval_f1": 0.9848637739656912, |
|
"eval_loss": 0.08204901963472366, |
|
"eval_precision": 0.9938900203665988, |
|
"eval_recall": 0.976, |
|
"eval_runtime": 466.5569, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 0.804, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.1411, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.9873333333333333, |
|
"eval_f1": 0.9872824631860776, |
|
"eval_loss": 0.05377618223428726, |
|
"eval_precision": 0.991263440860215, |
|
"eval_recall": 0.9833333333333333, |
|
"eval_runtime": 464.6089, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2055, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.9736666666666667, |
|
"eval_f1": 0.9730099077553809, |
|
"eval_loss": 0.15216030180454254, |
|
"eval_precision": 0.9978976874562018, |
|
"eval_recall": 0.9493333333333334, |
|
"eval_runtime": 465.4747, |
|
"eval_samples_per_second": 6.445, |
|
"eval_steps_per_second": 0.806, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.3018, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.9813333333333333, |
|
"eval_f1": 0.9815059445178336, |
|
"eval_loss": 0.07627255469560623, |
|
"eval_precision": 0.9725130890052356, |
|
"eval_recall": 0.9906666666666667, |
|
"eval_runtime": 464.4389, |
|
"eval_samples_per_second": 6.459, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.1702, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.9873333333333333, |
|
"eval_f1": 0.9873586161011311, |
|
"eval_loss": 0.07290682196617126, |
|
"eval_precision": 0.9853917662682603, |
|
"eval_recall": 0.9893333333333333, |
|
"eval_runtime": 464.5835, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2085, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.986, |
|
"eval_f1": 0.9859060402684564, |
|
"eval_loss": 0.08615541458129883, |
|
"eval_precision": 0.9925675675675676, |
|
"eval_recall": 0.9793333333333333, |
|
"eval_runtime": 464.5251, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.0899, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.987, |
|
"eval_f1": 0.9869782971619365, |
|
"eval_loss": 0.07593820989131927, |
|
"eval_precision": 0.988628762541806, |
|
"eval_recall": 0.9853333333333333, |
|
"eval_runtime": 464.4867, |
|
"eval_samples_per_second": 6.459, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.212, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.9846666666666667, |
|
"eval_f1": 0.9847277556440903, |
|
"eval_loss": 0.08726092427968979, |
|
"eval_precision": 0.9808201058201058, |
|
"eval_recall": 0.9886666666666667, |
|
"eval_runtime": 463.8528, |
|
"eval_samples_per_second": 6.468, |
|
"eval_steps_per_second": 0.808, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0459, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.981, |
|
"eval_f1": 0.9807237064592493, |
|
"eval_loss": 0.11619190126657486, |
|
"eval_precision": 0.9951956074124915, |
|
"eval_recall": 0.9666666666666667, |
|
"eval_runtime": 463.7918, |
|
"eval_samples_per_second": 6.468, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.2035, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.9876666666666667, |
|
"eval_f1": 0.9875797247398456, |
|
"eval_loss": 0.07956338673830032, |
|
"eval_precision": 0.9945909398242055, |
|
"eval_recall": 0.9806666666666667, |
|
"eval_runtime": 464.4206, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0942, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.979, |
|
"eval_f1": 0.979269496544916, |
|
"eval_loss": 0.09173166751861572, |
|
"eval_precision": 0.9668615984405458, |
|
"eval_recall": 0.992, |
|
"eval_runtime": 464.6027, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9e-06, |
|
"loss": 0.161, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.9873333333333333, |
|
"eval_f1": 0.9872397582269979, |
|
"eval_loss": 0.06276000291109085, |
|
"eval_precision": 0.9945872801082544, |
|
"eval_recall": 0.98, |
|
"eval_runtime": 463.3979, |
|
"eval_samples_per_second": 6.474, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0365, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.9843333333333333, |
|
"eval_f1": 0.9841162554917202, |
|
"eval_loss": 0.08324441313743591, |
|
"eval_precision": 0.997943797121316, |
|
"eval_recall": 0.9706666666666667, |
|
"eval_runtime": 463.6189, |
|
"eval_samples_per_second": 6.471, |
|
"eval_steps_per_second": 0.809, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.0508, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9843333333333333, |
|
"eval_f1": 0.9841162554917202, |
|
"eval_loss": 0.09412873536348343, |
|
"eval_precision": 0.997943797121316, |
|
"eval_recall": 0.9706666666666667, |
|
"eval_runtime": 466.9343, |
|
"eval_samples_per_second": 6.425, |
|
"eval_steps_per_second": 0.803, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0597, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9873333333333333, |
|
"eval_f1": 0.9872994652406418, |
|
"eval_loss": 0.05775593966245651, |
|
"eval_precision": 0.989946380697051, |
|
"eval_recall": 0.9846666666666667, |
|
"eval_runtime": 464.2967, |
|
"eval_samples_per_second": 6.461, |
|
"eval_steps_per_second": 0.808, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0055, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9863333333333333, |
|
"eval_f1": 0.9861813279406809, |
|
"eval_loss": 0.07944045215845108, |
|
"eval_precision": 0.9972733469665985, |
|
"eval_recall": 0.9753333333333334, |
|
"eval_runtime": 464.3973, |
|
"eval_samples_per_second": 6.46, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0681, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9873333333333333, |
|
"eval_f1": 0.9872139973082099, |
|
"eval_loss": 0.07313308119773865, |
|
"eval_precision": 0.9966032608695652, |
|
"eval_recall": 0.978, |
|
"eval_runtime": 464.8711, |
|
"eval_samples_per_second": 6.453, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0978, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9883333333333333, |
|
"eval_f1": 0.9883138564273791, |
|
"eval_loss": 0.058573223650455475, |
|
"eval_precision": 0.9899665551839465, |
|
"eval_recall": 0.9866666666666667, |
|
"eval_runtime": 465.0949, |
|
"eval_samples_per_second": 6.45, |
|
"eval_steps_per_second": 0.806, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1392, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.9883333333333333, |
|
"eval_f1": 0.9883060474440362, |
|
"eval_loss": 0.055864058434963226, |
|
"eval_precision": 0.9906229068988613, |
|
"eval_recall": 0.986, |
|
"eval_runtime": 464.3364, |
|
"eval_samples_per_second": 6.461, |
|
"eval_steps_per_second": 0.808, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0432, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9883333333333333, |
|
"eval_f1": 0.9883060474440362, |
|
"eval_loss": 0.055420782417058945, |
|
"eval_precision": 0.9906229068988613, |
|
"eval_recall": 0.986, |
|
"eval_runtime": 464.6017, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0006, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.988, |
|
"eval_f1": 0.9879679144385026, |
|
"eval_loss": 0.05567142367362976, |
|
"eval_precision": 0.9906166219839142, |
|
"eval_recall": 0.9853333333333333, |
|
"eval_runtime": 464.5424, |
|
"eval_samples_per_second": 6.458, |
|
"eval_steps_per_second": 0.807, |
|
"step": 1750 |
|
} |
|
], |
|
"max_steps": 1750, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.0151220215808e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|