|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 36.0, |
|
"global_step": 14040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5992906078421846, |
|
"eval_loss": 1.33469557762146, |
|
"eval_runtime": 5.8169, |
|
"eval_samples_per_second": 29.397, |
|
"eval_steps_per_second": 14.785, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.644586894586896e-06, |
|
"loss": 1.5408, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6796235478653008, |
|
"eval_loss": 1.0698518753051758, |
|
"eval_runtime": 5.8146, |
|
"eval_samples_per_second": 29.409, |
|
"eval_steps_per_second": 14.79, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 9.28917378917379e-06, |
|
"loss": 1.1283, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7372856151482106, |
|
"eval_loss": 0.8751375675201416, |
|
"eval_runtime": 5.8077, |
|
"eval_samples_per_second": 29.444, |
|
"eval_steps_per_second": 14.808, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 8.933048433048434e-06, |
|
"loss": 0.9078, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7711176956331666, |
|
"eval_loss": 0.7534294128417969, |
|
"eval_runtime": 5.8229, |
|
"eval_samples_per_second": 29.367, |
|
"eval_steps_per_second": 14.769, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.802166420482521, |
|
"eval_loss": 0.6710610389709473, |
|
"eval_runtime": 5.6976, |
|
"eval_samples_per_second": 30.013, |
|
"eval_steps_per_second": 15.094, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 8.576923076923077e-06, |
|
"loss": 0.7705, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8168786692759296, |
|
"eval_loss": 0.6077755689620972, |
|
"eval_runtime": 5.8114, |
|
"eval_samples_per_second": 29.425, |
|
"eval_steps_per_second": 14.798, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 8.220797720797722e-06, |
|
"loss": 0.6863, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.831802051431627, |
|
"eval_loss": 0.5667673945426941, |
|
"eval_runtime": 5.8128, |
|
"eval_samples_per_second": 29.418, |
|
"eval_steps_per_second": 14.795, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.864672364672366e-06, |
|
"loss": 0.6277, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8386034912718204, |
|
"eval_loss": 0.546131432056427, |
|
"eval_runtime": 5.8073, |
|
"eval_samples_per_second": 29.446, |
|
"eval_steps_per_second": 14.809, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 7.508547008547009e-06, |
|
"loss": 0.5863, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8513571943310113, |
|
"eval_loss": 0.5142761468887329, |
|
"eval_runtime": 5.8058, |
|
"eval_samples_per_second": 29.453, |
|
"eval_steps_per_second": 14.813, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8522174818930449, |
|
"eval_loss": 0.49920225143432617, |
|
"eval_runtime": 5.8205, |
|
"eval_samples_per_second": 29.379, |
|
"eval_steps_per_second": 14.775, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 7.152421652421653e-06, |
|
"loss": 0.5564, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8533209429717304, |
|
"eval_loss": 0.49400192499160767, |
|
"eval_runtime": 5.8142, |
|
"eval_samples_per_second": 29.411, |
|
"eval_steps_per_second": 14.791, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 6.796296296296296e-06, |
|
"loss": 0.5199, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8632922665875019, |
|
"eval_loss": 0.4726846218109131, |
|
"eval_runtime": 5.8155, |
|
"eval_samples_per_second": 29.404, |
|
"eval_steps_per_second": 14.788, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 6.440170940170941e-06, |
|
"loss": 0.5025, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8637540927527733, |
|
"eval_loss": 0.4585917592048645, |
|
"eval_runtime": 5.8153, |
|
"eval_samples_per_second": 29.405, |
|
"eval_steps_per_second": 14.789, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8673048600883653, |
|
"eval_loss": 0.45485442876815796, |
|
"eval_runtime": 5.8178, |
|
"eval_samples_per_second": 29.393, |
|
"eval_steps_per_second": 14.782, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 6.084045584045585e-06, |
|
"loss": 0.4814, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8698035411108416, |
|
"eval_loss": 0.44424179196357727, |
|
"eval_runtime": 5.7973, |
|
"eval_samples_per_second": 29.496, |
|
"eval_steps_per_second": 14.834, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 5.727920227920228e-06, |
|
"loss": 0.4746, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8749573607523999, |
|
"eval_loss": 0.43056586384773254, |
|
"eval_runtime": 5.6925, |
|
"eval_samples_per_second": 30.04, |
|
"eval_steps_per_second": 15.108, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 5.371794871794872e-06, |
|
"loss": 0.4527, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.874227107665129, |
|
"eval_loss": 0.42905324697494507, |
|
"eval_runtime": 5.8566, |
|
"eval_samples_per_second": 29.198, |
|
"eval_steps_per_second": 14.684, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 5.016381766381767e-06, |
|
"loss": 0.4382, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8751032706419789, |
|
"eval_loss": 0.4213222861289978, |
|
"eval_runtime": 5.8093, |
|
"eval_samples_per_second": 29.435, |
|
"eval_steps_per_second": 14.804, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8751214299591995, |
|
"eval_loss": 0.41926833987236023, |
|
"eval_runtime": 5.8022, |
|
"eval_samples_per_second": 29.472, |
|
"eval_steps_per_second": 14.822, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 4.6602564102564106e-06, |
|
"loss": 0.4328, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8759944995580002, |
|
"eval_loss": 0.41431769728660583, |
|
"eval_runtime": 5.8137, |
|
"eval_samples_per_second": 29.413, |
|
"eval_steps_per_second": 14.793, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 4.304131054131054e-06, |
|
"loss": 0.4191, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8835775987576434, |
|
"eval_loss": 0.4071265459060669, |
|
"eval_runtime": 5.8164, |
|
"eval_samples_per_second": 29.4, |
|
"eval_steps_per_second": 14.786, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 21.79, |
|
"learning_rate": 3.948717948717949e-06, |
|
"loss": 0.4106, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.881939052795031, |
|
"eval_loss": 0.3980366587638855, |
|
"eval_runtime": 5.875, |
|
"eval_samples_per_second": 29.106, |
|
"eval_steps_per_second": 14.638, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8821786026625207, |
|
"eval_loss": 0.39872363209724426, |
|
"eval_runtime": 5.8105, |
|
"eval_samples_per_second": 29.43, |
|
"eval_steps_per_second": 14.801, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 3.592592592592593e-06, |
|
"loss": 0.4037, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8819395993222366, |
|
"eval_loss": 0.40265128016471863, |
|
"eval_runtime": 5.8012, |
|
"eval_samples_per_second": 29.477, |
|
"eval_steps_per_second": 14.824, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 3.2364672364672365e-06, |
|
"loss": 0.3893, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8892501819946614, |
|
"eval_loss": 0.3867790400981903, |
|
"eval_runtime": 5.8209, |
|
"eval_samples_per_second": 29.377, |
|
"eval_steps_per_second": 14.774, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 2.8803418803418804e-06, |
|
"loss": 0.3991, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.8846003326484688, |
|
"eval_loss": 0.3882477581501007, |
|
"eval_runtime": 5.8184, |
|
"eval_samples_per_second": 29.389, |
|
"eval_steps_per_second": 14.781, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 2.5242165242165246e-06, |
|
"loss": 0.3786, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8858714334822964, |
|
"eval_loss": 0.3939129710197449, |
|
"eval_runtime": 5.8222, |
|
"eval_samples_per_second": 29.37, |
|
"eval_steps_per_second": 14.771, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8847989764283254, |
|
"eval_loss": 0.39587706327438354, |
|
"eval_runtime": 5.8063, |
|
"eval_samples_per_second": 29.451, |
|
"eval_steps_per_second": 14.812, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 2.168803418803419e-06, |
|
"loss": 0.38, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8849687976020835, |
|
"eval_loss": 0.3949810862541199, |
|
"eval_runtime": 5.8138, |
|
"eval_samples_per_second": 29.413, |
|
"eval_steps_per_second": 14.792, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 1.8126780626780629e-06, |
|
"loss": 0.3764, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8893153879792042, |
|
"eval_loss": 0.3783101439476013, |
|
"eval_runtime": 5.8039, |
|
"eval_samples_per_second": 29.463, |
|
"eval_steps_per_second": 14.818, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 1.4565527065527065e-06, |
|
"loss": 0.3708, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8890792500856626, |
|
"eval_loss": 0.3798995912075043, |
|
"eval_runtime": 5.7021, |
|
"eval_samples_per_second": 29.989, |
|
"eval_steps_per_second": 15.082, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8867172306495527, |
|
"eval_loss": 0.39150363206863403, |
|
"eval_runtime": 5.8791, |
|
"eval_samples_per_second": 29.086, |
|
"eval_steps_per_second": 14.628, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 1.1004273504273506e-06, |
|
"loss": 0.3656, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8902530694061639, |
|
"eval_loss": 0.3780055344104767, |
|
"eval_runtime": 5.8784, |
|
"eval_samples_per_second": 29.09, |
|
"eval_steps_per_second": 14.63, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 7.443019943019944e-07, |
|
"loss": 0.3617, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8873723487824038, |
|
"eval_loss": 0.38049712777137756, |
|
"eval_runtime": 5.8779, |
|
"eval_samples_per_second": 29.092, |
|
"eval_steps_per_second": 14.631, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 34.62, |
|
"learning_rate": 3.8817663817663825e-07, |
|
"loss": 0.361, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8919778767559101, |
|
"eval_loss": 0.3775971233844757, |
|
"eval_runtime": 5.8063, |
|
"eval_samples_per_second": 29.451, |
|
"eval_steps_per_second": 14.812, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"learning_rate": 3.205128205128205e-08, |
|
"loss": 0.3595, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8888084202394747, |
|
"eval_loss": 0.3711872100830078, |
|
"eval_runtime": 5.8621, |
|
"eval_samples_per_second": 29.171, |
|
"eval_steps_per_second": 14.671, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"step": 14040, |
|
"total_flos": 3.509780816886497e+17, |
|
"train_loss": 0.5379996796958467, |
|
"train_runtime": 13714.1823, |
|
"train_samples_per_second": 8.19, |
|
"train_steps_per_second": 1.024 |
|
} |
|
], |
|
"max_steps": 14040, |
|
"num_train_epochs": 36, |
|
"total_flos": 3.509780816886497e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|