|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 6260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.920127795527157e-05, |
|
"loss": 0.7778, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.840255591054313e-05, |
|
"loss": 0.5964, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.76038338658147e-05, |
|
"loss": 0.5901, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.680511182108626e-05, |
|
"loss": 0.4494, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.600638977635783e-05, |
|
"loss": 0.4166, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.520766773162939e-05, |
|
"loss": 0.4516, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8979708054137165, |
|
"eval_f1": 0.44450345193839613, |
|
"eval_loss": 0.40465056896209717, |
|
"eval_precision": 0.4332298136645963, |
|
"eval_recall": 0.4563794983642312, |
|
"eval_runtime": 68.5329, |
|
"eval_samples_per_second": 146.017, |
|
"eval_steps_per_second": 36.508, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.440894568690096e-05, |
|
"loss": 0.4155, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.361022364217253e-05, |
|
"loss": 0.3645, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.2811501597444096e-05, |
|
"loss": 0.3712, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.201277955271566e-05, |
|
"loss": 0.3669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1214057507987225e-05, |
|
"loss": 0.3424, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.041533546325879e-05, |
|
"loss": 0.3677, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9192688065906114, |
|
"eval_f1": 0.5293376983127676, |
|
"eval_loss": 0.2773844301700592, |
|
"eval_precision": 0.4918109499298081, |
|
"eval_recall": 0.5730643402399127, |
|
"eval_runtime": 67.6554, |
|
"eval_samples_per_second": 147.911, |
|
"eval_steps_per_second": 36.982, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9616613418530355e-05, |
|
"loss": 0.3039, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.8817891373801916e-05, |
|
"loss": 0.2599, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.8019169329073485e-05, |
|
"loss": 0.3243, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.722044728434505e-05, |
|
"loss": 0.2701, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.6421725239616614e-05, |
|
"loss": 0.2634, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.562300319488818e-05, |
|
"loss": 0.2892, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9383648753820163, |
|
"eval_f1": 0.6352631578947368, |
|
"eval_loss": 0.21329015493392944, |
|
"eval_precision": 0.6139369277721262, |
|
"eval_recall": 0.6581243184296619, |
|
"eval_runtime": 67.955, |
|
"eval_samples_per_second": 147.259, |
|
"eval_steps_per_second": 36.818, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.482428115015975e-05, |
|
"loss": 0.315, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.402555910543131e-05, |
|
"loss": 0.242, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.322683706070287e-05, |
|
"loss": 0.2366, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.242811501597444e-05, |
|
"loss": 0.2233, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.162939297124601e-05, |
|
"loss": 0.233, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.083067092651757e-05, |
|
"loss": 0.2469, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 3.003194888178914e-05, |
|
"loss": 0.2736, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9488050720373569, |
|
"eval_f1": 0.6536661466458659, |
|
"eval_loss": 0.17724330723285675, |
|
"eval_precision": 0.6247514910536779, |
|
"eval_recall": 0.6853871319520175, |
|
"eval_runtime": 68.1206, |
|
"eval_samples_per_second": 146.901, |
|
"eval_steps_per_second": 36.729, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 2.9233226837060707e-05, |
|
"loss": 0.1691, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 2.843450479233227e-05, |
|
"loss": 0.1941, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 2.7635782747603834e-05, |
|
"loss": 0.1891, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.68370607028754e-05, |
|
"loss": 0.2037, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.6038338658146967e-05, |
|
"loss": 0.2222, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.523961661341853e-05, |
|
"loss": 0.221, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9560372809931474, |
|
"eval_f1": 0.6772486772486773, |
|
"eval_loss": 0.15026314556598663, |
|
"eval_precision": 0.6295081967213115, |
|
"eval_recall": 0.732824427480916, |
|
"eval_runtime": 68.8869, |
|
"eval_samples_per_second": 145.267, |
|
"eval_steps_per_second": 36.32, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.44408945686901e-05, |
|
"loss": 0.1524, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 2.364217252396166e-05, |
|
"loss": 0.1575, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 2.284345047923323e-05, |
|
"loss": 0.1606, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 2.2044728434504794e-05, |
|
"loss": 0.1314, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 2.124600638977636e-05, |
|
"loss": 0.1845, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 2.0447284345047924e-05, |
|
"loss": 0.1569, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9622824168106149, |
|
"eval_f1": 0.7409068261086198, |
|
"eval_loss": 0.1283087134361267, |
|
"eval_precision": 0.6821100917431193, |
|
"eval_recall": 0.8107960741548528, |
|
"eval_runtime": 69.0068, |
|
"eval_samples_per_second": 145.015, |
|
"eval_steps_per_second": 36.257, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.964856230031949e-05, |
|
"loss": 0.1495, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.8849840255591057e-05, |
|
"loss": 0.1309, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.805111821086262e-05, |
|
"loss": 0.131, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.7252396166134186e-05, |
|
"loss": 0.1177, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 1.645367412140575e-05, |
|
"loss": 0.1046, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.565495207667732e-05, |
|
"loss": 0.1534, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9707674493650462, |
|
"eval_f1": 0.7749154306531356, |
|
"eval_loss": 0.09951327741146088, |
|
"eval_precision": 0.7411647585863613, |
|
"eval_recall": 0.811886586695747, |
|
"eval_runtime": 67.5487, |
|
"eval_samples_per_second": 148.145, |
|
"eval_steps_per_second": 37.04, |
|
"step": 4382 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.485623003194888e-05, |
|
"loss": 0.1147, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.4057507987220447e-05, |
|
"loss": 0.1158, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 1.3258785942492014e-05, |
|
"loss": 0.0993, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1.2460063897763578e-05, |
|
"loss": 0.1288, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 1.1661341853035145e-05, |
|
"loss": 0.0874, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 1.086261980830671e-05, |
|
"loss": 0.105, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 1.0063897763578276e-05, |
|
"loss": 0.089, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9760065298684535, |
|
"eval_f1": 0.8010457516339871, |
|
"eval_loss": 0.08459383249282837, |
|
"eval_precision": 0.7694625816172778, |
|
"eval_recall": 0.8353326063249727, |
|
"eval_runtime": 67.5952, |
|
"eval_samples_per_second": 148.043, |
|
"eval_steps_per_second": 37.014, |
|
"step": 5008 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.265175718849841e-06, |
|
"loss": 0.0766, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 8.466453674121406e-06, |
|
"loss": 0.0929, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 7.66773162939297e-06, |
|
"loss": 0.089, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 6.869009584664538e-06, |
|
"loss": 0.0946, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 6.070287539936103e-06, |
|
"loss": 0.0757, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 5.2715654952076674e-06, |
|
"loss": 0.0923, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9789108027562119, |
|
"eval_f1": 0.828852119958635, |
|
"eval_loss": 0.07430661469697952, |
|
"eval_precision": 0.788102261553589, |
|
"eval_recall": 0.8740458015267175, |
|
"eval_runtime": 67.8851, |
|
"eval_samples_per_second": 147.411, |
|
"eval_steps_per_second": 36.856, |
|
"step": 5634 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.472843450479233e-06, |
|
"loss": 0.0578, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3.6741214057507987e-06, |
|
"loss": 0.0664, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 2.8753993610223644e-06, |
|
"loss": 0.0616, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.0766773162939296e-06, |
|
"loss": 0.0759, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 1.2779552715654952e-06, |
|
"loss": 0.0769, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 4.792332268370607e-07, |
|
"loss": 0.0711, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9819289686983922, |
|
"eval_f1": 0.8444211629125196, |
|
"eval_loss": 0.06683139503002167, |
|
"eval_precision": 0.8125, |
|
"eval_recall": 0.8789531079607416, |
|
"eval_runtime": 67.6072, |
|
"eval_samples_per_second": 148.017, |
|
"eval_steps_per_second": 37.008, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 6260, |
|
"total_flos": 1.307859275810304e+16, |
|
"train_loss": 0.217802461039144, |
|
"train_runtime": 2284.8138, |
|
"train_samples_per_second": 43.798, |
|
"train_steps_per_second": 2.74 |
|
} |
|
], |
|
"max_steps": 6260, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.307859275810304e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|