|
{ |
|
"best_metric": 0.6745283018867925, |
|
"best_model_checkpoint": "videomae-base-finetuned-engine-subset-20230313/checkpoint-988", |
|
"epoch": 29.00720720720721, |
|
"global_step": 1110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 2.7278, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.00900900900901e-06, |
|
"loss": 2.747, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 2.6212, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.37735849056603776, |
|
"eval_loss": 2.362868070602417, |
|
"eval_runtime": 69.3989, |
|
"eval_samples_per_second": 6.11, |
|
"eval_steps_per_second": 1.023, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.801801801801802e-05, |
|
"loss": 2.6599, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.2522522522522523e-05, |
|
"loss": 2.5895, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 2.5383, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.153153153153153e-05, |
|
"loss": 2.455, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.2169811320754717, |
|
"eval_loss": 2.3674099445343018, |
|
"eval_runtime": 70.0897, |
|
"eval_samples_per_second": 6.049, |
|
"eval_steps_per_second": 1.013, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.603603603603604e-05, |
|
"loss": 2.421, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 2.4139, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.5045045045045046e-05, |
|
"loss": 2.4999, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.954954954954955e-05, |
|
"loss": 2.4311, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.3231132075471698, |
|
"eval_loss": 2.2190816402435303, |
|
"eval_runtime": 71.2703, |
|
"eval_samples_per_second": 5.949, |
|
"eval_steps_per_second": 0.996, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.954954954954955e-05, |
|
"loss": 2.5089, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.9049049049049054e-05, |
|
"loss": 2.4986, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.854854854854855e-05, |
|
"loss": 2.3317, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.804804804804805e-05, |
|
"loss": 2.2768, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_accuracy": 0.3608490566037736, |
|
"eval_loss": 2.1226866245269775, |
|
"eval_runtime": 70.1636, |
|
"eval_samples_per_second": 6.043, |
|
"eval_steps_per_second": 1.012, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.754754754754755e-05, |
|
"loss": 2.0041, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.704704704704705e-05, |
|
"loss": 2.2092, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.654654654654655e-05, |
|
"loss": 1.9442, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.604604604604605e-05, |
|
"loss": 1.7528, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.4363207547169811, |
|
"eval_loss": 1.729621171951294, |
|
"eval_runtime": 69.2109, |
|
"eval_samples_per_second": 6.126, |
|
"eval_steps_per_second": 1.026, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.5545545545545545e-05, |
|
"loss": 1.6692, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.5045045045045046e-05, |
|
"loss": 1.8567, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.4544544544544546e-05, |
|
"loss": 1.5381, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_accuracy": 0.4339622641509434, |
|
"eval_loss": 1.5015883445739746, |
|
"eval_runtime": 71.2754, |
|
"eval_samples_per_second": 5.949, |
|
"eval_steps_per_second": 0.996, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.404404404404405e-05, |
|
"loss": 1.4846, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 4.354354354354355e-05, |
|
"loss": 1.3143, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.304304304304305e-05, |
|
"loss": 1.2959, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.254254254254255e-05, |
|
"loss": 1.407, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.5448113207547169, |
|
"eval_loss": 1.2878233194351196, |
|
"eval_runtime": 70.9118, |
|
"eval_samples_per_second": 5.979, |
|
"eval_steps_per_second": 1.001, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.204204204204204e-05, |
|
"loss": 1.3608, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 4.1541541541541544e-05, |
|
"loss": 1.5729, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.1041041041041045e-05, |
|
"loss": 1.3095, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 1.1053, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_accuracy": 0.4009433962264151, |
|
"eval_loss": 1.5210211277008057, |
|
"eval_runtime": 68.6229, |
|
"eval_samples_per_second": 6.179, |
|
"eval_steps_per_second": 1.035, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 4.0040040040040046e-05, |
|
"loss": 1.2462, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.953953953953955e-05, |
|
"loss": 1.114, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.903903903903904e-05, |
|
"loss": 1.2971, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 3.8538538538538534e-05, |
|
"loss": 1.0893, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_accuracy": 0.46226415094339623, |
|
"eval_loss": 1.3901519775390625, |
|
"eval_runtime": 70.4934, |
|
"eval_samples_per_second": 6.015, |
|
"eval_steps_per_second": 1.007, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.8038038038038035e-05, |
|
"loss": 1.3258, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3.7537537537537536e-05, |
|
"loss": 1.1349, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.0029, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.653653653653654e-05, |
|
"loss": 0.8136, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_accuracy": 0.4033018867924528, |
|
"eval_loss": 1.6456053256988525, |
|
"eval_runtime": 71.0727, |
|
"eval_samples_per_second": 5.966, |
|
"eval_steps_per_second": 0.999, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 3.603603603603604e-05, |
|
"loss": 1.1048, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3.553553553553554e-05, |
|
"loss": 1.2661, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.503503503503503e-05, |
|
"loss": 0.9565, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"eval_accuracy": 0.5613207547169812, |
|
"eval_loss": 1.18259859085083, |
|
"eval_runtime": 70.181, |
|
"eval_samples_per_second": 6.042, |
|
"eval_steps_per_second": 1.012, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.453453453453453e-05, |
|
"loss": 1.0921, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 3.4034034034034034e-05, |
|
"loss": 0.7383, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 3.3533533533533535e-05, |
|
"loss": 0.926, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 3.3033033033033035e-05, |
|
"loss": 1.0147, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"eval_accuracy": 0.5117924528301887, |
|
"eval_loss": 1.2098637819290161, |
|
"eval_runtime": 70.8824, |
|
"eval_samples_per_second": 5.982, |
|
"eval_steps_per_second": 1.002, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.2532532532532536e-05, |
|
"loss": 0.9178, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.203203203203203e-05, |
|
"loss": 0.8994, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 3.153153153153153e-05, |
|
"loss": 0.8648, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.103103103103103e-05, |
|
"loss": 0.9125, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"eval_accuracy": 0.5495283018867925, |
|
"eval_loss": 1.1849919557571411, |
|
"eval_runtime": 70.4782, |
|
"eval_samples_per_second": 6.016, |
|
"eval_steps_per_second": 1.007, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 3.053053053053053e-05, |
|
"loss": 0.8926, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 3.0030030030030033e-05, |
|
"loss": 0.8618, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 2.952952952952953e-05, |
|
"loss": 0.8057, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 2.902902902902903e-05, |
|
"loss": 0.7091, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"eval_accuracy": 0.535377358490566, |
|
"eval_loss": 1.2324182987213135, |
|
"eval_runtime": 70.9353, |
|
"eval_samples_per_second": 5.977, |
|
"eval_steps_per_second": 1.001, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 2.852852852852853e-05, |
|
"loss": 0.74, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.8028028028028032e-05, |
|
"loss": 0.7484, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.752752752752753e-05, |
|
"loss": 0.7962, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 0.7361, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"eval_accuracy": 0.6226415094339622, |
|
"eval_loss": 1.022481083869934, |
|
"eval_runtime": 70.6845, |
|
"eval_samples_per_second": 5.998, |
|
"eval_steps_per_second": 1.004, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 2.652652652652653e-05, |
|
"loss": 0.8396, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 2.6026026026026028e-05, |
|
"loss": 0.7193, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 2.552552552552553e-05, |
|
"loss": 0.6979, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"eval_accuracy": 0.5589622641509434, |
|
"eval_loss": 1.0737839937210083, |
|
"eval_runtime": 69.8991, |
|
"eval_samples_per_second": 6.066, |
|
"eval_steps_per_second": 1.016, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.502502502502503e-05, |
|
"loss": 0.6869, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 2.4524524524524527e-05, |
|
"loss": 0.6704, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 2.4024024024024024e-05, |
|
"loss": 0.8011, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 2.3523523523523525e-05, |
|
"loss": 0.5265, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"eval_accuracy": 0.5872641509433962, |
|
"eval_loss": 1.1062343120574951, |
|
"eval_runtime": 70.6095, |
|
"eval_samples_per_second": 6.005, |
|
"eval_steps_per_second": 1.006, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2.3023023023023026e-05, |
|
"loss": 0.6415, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 2.2522522522522523e-05, |
|
"loss": 0.7402, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 2.2022022022022024e-05, |
|
"loss": 0.6275, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 2.1521521521521524e-05, |
|
"loss": 0.5651, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"eval_accuracy": 0.5801886792452831, |
|
"eval_loss": 1.1402446031570435, |
|
"eval_runtime": 70.0342, |
|
"eval_samples_per_second": 6.054, |
|
"eval_steps_per_second": 1.014, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 2.102102102102102e-05, |
|
"loss": 0.5542, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 2.0520520520520522e-05, |
|
"loss": 0.6903, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 2.0020020020020023e-05, |
|
"loss": 0.7, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 1.951951951951952e-05, |
|
"loss": 0.7182, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"eval_accuracy": 0.5801886792452831, |
|
"eval_loss": 1.097399115562439, |
|
"eval_runtime": 70.689, |
|
"eval_samples_per_second": 5.998, |
|
"eval_steps_per_second": 1.004, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 1.9019019019019018e-05, |
|
"loss": 0.5921, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5428, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 1.801801801801802e-05, |
|
"loss": 0.7289, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 1.7517517517517516e-05, |
|
"loss": 0.6582, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"eval_accuracy": 0.6179245283018868, |
|
"eval_loss": 1.0529001951217651, |
|
"eval_runtime": 70.8949, |
|
"eval_samples_per_second": 5.981, |
|
"eval_steps_per_second": 1.001, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 1.7017017017017017e-05, |
|
"loss": 0.5001, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 1.6516516516516518e-05, |
|
"loss": 0.5387, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 20.03, |
|
"learning_rate": 1.6016016016016015e-05, |
|
"loss": 0.5709, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 20.03, |
|
"eval_accuracy": 0.6344339622641509, |
|
"eval_loss": 0.965450644493103, |
|
"eval_runtime": 69.2205, |
|
"eval_samples_per_second": 6.125, |
|
"eval_steps_per_second": 1.026, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 1.5515515515515516e-05, |
|
"loss": 0.7102, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 1.5015015015015016e-05, |
|
"loss": 0.4746, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 1.4514514514514515e-05, |
|
"loss": 0.611, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 1.4014014014014016e-05, |
|
"loss": 0.4808, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"eval_accuracy": 0.6226415094339622, |
|
"eval_loss": 1.0440747737884521, |
|
"eval_runtime": 70.6133, |
|
"eval_samples_per_second": 6.005, |
|
"eval_steps_per_second": 1.005, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.5081, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 1.3013013013013014e-05, |
|
"loss": 0.6717, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 1.2512512512512515e-05, |
|
"loss": 0.343, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 1.2012012012012012e-05, |
|
"loss": 0.5816, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"eval_accuracy": 0.6438679245283019, |
|
"eval_loss": 0.9445228576660156, |
|
"eval_runtime": 70.5922, |
|
"eval_samples_per_second": 6.006, |
|
"eval_steps_per_second": 1.006, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 1.1511511511511513e-05, |
|
"loss": 0.4179, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 1.1011011011011012e-05, |
|
"loss": 0.4346, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 23.02, |
|
"learning_rate": 1.051051051051051e-05, |
|
"loss": 0.4038, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 1.0010010010010011e-05, |
|
"loss": 0.5057, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"eval_accuracy": 0.6320754716981132, |
|
"eval_loss": 1.0248113870620728, |
|
"eval_runtime": 71.1172, |
|
"eval_samples_per_second": 5.962, |
|
"eval_steps_per_second": 0.998, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 9.509509509509509e-06, |
|
"loss": 0.393, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 9.00900900900901e-06, |
|
"loss": 0.4297, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 8.508508508508508e-06, |
|
"loss": 0.5053, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"learning_rate": 8.008008008008007e-06, |
|
"loss": 0.6253, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 24.03, |
|
"eval_accuracy": 0.660377358490566, |
|
"eval_loss": 0.9517852067947388, |
|
"eval_runtime": 70.664, |
|
"eval_samples_per_second": 6.0, |
|
"eval_steps_per_second": 1.005, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 7.507507507507508e-06, |
|
"loss": 0.4143, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 7.007007007007008e-06, |
|
"loss": 0.6003, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"learning_rate": 6.506506506506507e-06, |
|
"loss": 0.6841, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 25.03, |
|
"eval_accuracy": 0.6745283018867925, |
|
"eval_loss": 0.8912516236305237, |
|
"eval_runtime": 70.8493, |
|
"eval_samples_per_second": 5.985, |
|
"eval_steps_per_second": 1.002, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 6.006006006006006e-06, |
|
"loss": 0.2104, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 5.505505505505506e-06, |
|
"loss": 0.6054, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"learning_rate": 5.005005005005006e-06, |
|
"loss": 0.6045, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"learning_rate": 4.504504504504505e-06, |
|
"loss": 0.5933, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"eval_accuracy": 0.6438679245283019, |
|
"eval_loss": 0.9012843370437622, |
|
"eval_runtime": 71.2482, |
|
"eval_samples_per_second": 5.951, |
|
"eval_steps_per_second": 0.997, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.004004004004004e-06, |
|
"loss": 0.4052, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 3.503503503503504e-06, |
|
"loss": 0.4422, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 3.003003003003003e-06, |
|
"loss": 0.4861, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 2.502502502502503e-06, |
|
"loss": 0.389, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"eval_accuracy": 0.6627358490566038, |
|
"eval_loss": 0.9089723229408264, |
|
"eval_runtime": 71.3879, |
|
"eval_samples_per_second": 5.939, |
|
"eval_steps_per_second": 0.995, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.002002002002002e-06, |
|
"loss": 0.4572, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 1.5015015015015015e-06, |
|
"loss": 0.213, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 1.001001001001001e-06, |
|
"loss": 0.6041, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"learning_rate": 5.005005005005005e-07, |
|
"loss": 0.3705, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"eval_accuracy": 0.6721698113207547, |
|
"eval_loss": 0.8935866355895996, |
|
"eval_runtime": 70.2956, |
|
"eval_samples_per_second": 6.032, |
|
"eval_steps_per_second": 1.01, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 0.0, |
|
"loss": 0.6043, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_accuracy": 0.6721698113207547, |
|
"eval_loss": 0.8942137956619263, |
|
"eval_runtime": 69.8197, |
|
"eval_samples_per_second": 6.073, |
|
"eval_steps_per_second": 1.017, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"step": 1110, |
|
"total_flos": 8.191332105366897e+18, |
|
"train_loss": 1.059841639716346, |
|
"train_runtime": 5977.6934, |
|
"train_samples_per_second": 1.114, |
|
"train_steps_per_second": 0.186 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_accuracy": 0.6745283018867925, |
|
"eval_loss": 0.8912516236305237, |
|
"eval_runtime": 68.8583, |
|
"eval_samples_per_second": 6.158, |
|
"eval_steps_per_second": 1.031, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"eval_accuracy": 0.6745283018867925, |
|
"eval_loss": 0.8912516236305237, |
|
"eval_runtime": 68.7355, |
|
"eval_samples_per_second": 6.169, |
|
"eval_steps_per_second": 1.033, |
|
"step": 1110 |
|
} |
|
], |
|
"max_steps": 1110, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 8.191332105366897e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|