|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 31.998864926220204, |
|
"global_step": 7040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8224893386183709, |
|
"eval_loss": 1.1622651815414429, |
|
"eval_runtime": 5.9816, |
|
"eval_samples_per_second": 15.213, |
|
"eval_steps_per_second": 7.69, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8538665635439829, |
|
"eval_loss": 0.9566460251808167, |
|
"eval_runtime": 5.9768, |
|
"eval_samples_per_second": 15.226, |
|
"eval_steps_per_second": 7.696, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.6448863636363636e-05, |
|
"loss": 1.1942, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8709140322043548, |
|
"eval_loss": 0.8456286191940308, |
|
"eval_runtime": 5.9871, |
|
"eval_samples_per_second": 15.199, |
|
"eval_steps_per_second": 7.683, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8801198801198801, |
|
"eval_loss": 0.7718582153320312, |
|
"eval_runtime": 5.6728, |
|
"eval_samples_per_second": 16.041, |
|
"eval_steps_per_second": 8.109, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.289772727272727e-05, |
|
"loss": 0.7805, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8871880807364678, |
|
"eval_loss": 0.7224407196044922, |
|
"eval_runtime": 5.988, |
|
"eval_samples_per_second": 15.197, |
|
"eval_steps_per_second": 7.682, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.892816860558796, |
|
"eval_loss": 0.6894707679748535, |
|
"eval_runtime": 5.9958, |
|
"eval_samples_per_second": 15.177, |
|
"eval_steps_per_second": 7.672, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 3.934659090909091e-05, |
|
"loss": 0.6257, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8972103165651553, |
|
"eval_loss": 0.6574080586433411, |
|
"eval_runtime": 5.9909, |
|
"eval_samples_per_second": 15.19, |
|
"eval_steps_per_second": 7.678, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9014426433781273, |
|
"eval_loss": 0.6289474368095398, |
|
"eval_runtime": 5.6911, |
|
"eval_samples_per_second": 15.99, |
|
"eval_steps_per_second": 8.083, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9045470658373884, |
|
"eval_loss": 0.6054205298423767, |
|
"eval_runtime": 5.6786, |
|
"eval_samples_per_second": 16.025, |
|
"eval_steps_per_second": 8.101, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 3.579545454545455e-05, |
|
"loss": 0.5385, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9076622302428754, |
|
"eval_loss": 0.5881273746490479, |
|
"eval_runtime": 6.0014, |
|
"eval_samples_per_second": 15.163, |
|
"eval_steps_per_second": 7.665, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9101865876059424, |
|
"eval_loss": 0.5709272623062134, |
|
"eval_runtime": 5.9886, |
|
"eval_samples_per_second": 15.195, |
|
"eval_steps_per_second": 7.681, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 3.2244318181818185e-05, |
|
"loss": 0.4778, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9120986540341379, |
|
"eval_loss": 0.5591339468955994, |
|
"eval_runtime": 5.9874, |
|
"eval_samples_per_second": 15.199, |
|
"eval_steps_per_second": 7.683, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9142900110642046, |
|
"eval_loss": 0.5496613383293152, |
|
"eval_runtime": 5.9826, |
|
"eval_samples_per_second": 15.211, |
|
"eval_steps_per_second": 7.689, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 2.870028409090909e-05, |
|
"loss": 0.427, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9161053999763678, |
|
"eval_loss": 0.5385328531265259, |
|
"eval_runtime": 5.9885, |
|
"eval_samples_per_second": 15.196, |
|
"eval_steps_per_second": 7.681, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9179637566734341, |
|
"eval_loss": 0.5258467793464661, |
|
"eval_runtime": 5.9812, |
|
"eval_samples_per_second": 15.214, |
|
"eval_steps_per_second": 7.691, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 2.5149147727272725e-05, |
|
"loss": 0.394, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9195428227686292, |
|
"eval_loss": 0.5170450806617737, |
|
"eval_runtime": 5.6775, |
|
"eval_samples_per_second": 16.028, |
|
"eval_steps_per_second": 8.102, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9211648566487276, |
|
"eval_loss": 0.5156892538070679, |
|
"eval_runtime": 5.9945, |
|
"eval_samples_per_second": 15.18, |
|
"eval_steps_per_second": 7.674, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9220671801316963, |
|
"eval_loss": 0.5037761926651001, |
|
"eval_runtime": 5.687, |
|
"eval_samples_per_second": 16.001, |
|
"eval_steps_per_second": 8.089, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 2.1598011363636363e-05, |
|
"loss": 0.363, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9233884395174717, |
|
"eval_loss": 0.49766021966934204, |
|
"eval_runtime": 5.6849, |
|
"eval_samples_per_second": 16.007, |
|
"eval_steps_per_second": 8.092, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9236462462268914, |
|
"eval_loss": 0.4975946843624115, |
|
"eval_runtime": 5.9923, |
|
"eval_samples_per_second": 15.186, |
|
"eval_steps_per_second": 7.677, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 20.45, |
|
"learning_rate": 1.8046875000000003e-05, |
|
"loss": 0.3392, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9246882150107957, |
|
"eval_loss": 0.49241966009140015, |
|
"eval_runtime": 5.9923, |
|
"eval_samples_per_second": 15.186, |
|
"eval_steps_per_second": 7.677, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9255046029239578, |
|
"eval_loss": 0.4887617826461792, |
|
"eval_runtime": 5.69, |
|
"eval_samples_per_second": 15.993, |
|
"eval_steps_per_second": 8.084, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"learning_rate": 1.4495738636363637e-05, |
|
"loss": 0.33, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9262028294286359, |
|
"eval_loss": 0.4889785051345825, |
|
"eval_runtime": 6.0024, |
|
"eval_samples_per_second": 15.161, |
|
"eval_steps_per_second": 7.664, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9267936364710558, |
|
"eval_loss": 0.4856303334236145, |
|
"eval_runtime": 5.9867, |
|
"eval_samples_per_second": 15.2, |
|
"eval_steps_per_second": 7.684, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.0951704545454545e-05, |
|
"loss": 0.3058, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9275348307606373, |
|
"eval_loss": 0.4802783131599426, |
|
"eval_runtime": 5.6869, |
|
"eval_samples_per_second": 16.002, |
|
"eval_steps_per_second": 8.089, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9277389277389277, |
|
"eval_loss": 0.47845765948295593, |
|
"eval_runtime": 5.9773, |
|
"eval_samples_per_second": 15.224, |
|
"eval_steps_per_second": 7.696, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9280611861257022, |
|
"eval_loss": 0.4813348948955536, |
|
"eval_runtime": 5.6869, |
|
"eval_samples_per_second": 16.002, |
|
"eval_steps_per_second": 8.089, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 7.407670454545455e-06, |
|
"loss": 0.2973, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9281793475341863, |
|
"eval_loss": 0.4798637628555298, |
|
"eval_runtime": 5.6812, |
|
"eval_samples_per_second": 16.018, |
|
"eval_steps_per_second": 8.097, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9285230898134124, |
|
"eval_loss": 0.47730037569999695, |
|
"eval_runtime": 5.9912, |
|
"eval_samples_per_second": 15.189, |
|
"eval_steps_per_second": 7.678, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 29.54, |
|
"learning_rate": 3.856534090909091e-06, |
|
"loss": 0.2931, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9285982834369931, |
|
"eval_loss": 0.4778377413749695, |
|
"eval_runtime": 5.9957, |
|
"eval_samples_per_second": 15.177, |
|
"eval_steps_per_second": 7.672, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.9289527676624451, |
|
"eval_loss": 0.4756244122982025, |
|
"eval_runtime": 5.6812, |
|
"eval_samples_per_second": 16.018, |
|
"eval_steps_per_second": 8.097, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 3.053977272727273e-07, |
|
"loss": 0.2879, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9289527676624451, |
|
"eval_loss": 0.47753414511680603, |
|
"eval_runtime": 5.6774, |
|
"eval_samples_per_second": 16.029, |
|
"eval_steps_per_second": 8.102, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"step": 7040, |
|
"total_flos": 2.4539422830415053e+17, |
|
"train_loss": 0.47427067851478405, |
|
"train_runtime": 10360.152, |
|
"train_samples_per_second": 5.442, |
|
"train_steps_per_second": 0.68 |
|
} |
|
], |
|
"max_steps": 7040, |
|
"num_train_epochs": 32, |
|
"total_flos": 2.4539422830415053e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|