|
{ |
|
"best_metric": 0.9870629938253499, |
|
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-base-e-snli-classification-nli_explanation-base/checkpoint-4800", |
|
"epoch": 0.6989748369058714, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1055900621118013e-06, |
|
"loss": 0.7992, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.9624060150375939, |
|
"eval_f1": 0.9623351677726931, |
|
"eval_loss": 0.1367802619934082, |
|
"eval_runtime": 4.2857, |
|
"eval_samples_per_second": 2296.452, |
|
"eval_steps_per_second": 35.933, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 6.2111801242236025e-06, |
|
"loss": 0.1514, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.975716317821581, |
|
"eval_f1": 0.9756730714212107, |
|
"eval_loss": 0.08741448074579239, |
|
"eval_runtime": 4.5673, |
|
"eval_samples_per_second": 2154.901, |
|
"eval_steps_per_second": 33.718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.316770186335405e-06, |
|
"loss": 0.121, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9778500304816095, |
|
"eval_f1": 0.9778119667614332, |
|
"eval_loss": 0.07925494015216827, |
|
"eval_runtime": 4.3138, |
|
"eval_samples_per_second": 2281.508, |
|
"eval_steps_per_second": 35.699, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.87246566383257e-06, |
|
"loss": 0.1088, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.9818126397073765, |
|
"eval_f1": 0.9817742348532473, |
|
"eval_loss": 0.07087873667478561, |
|
"eval_runtime": 4.671, |
|
"eval_samples_per_second": 2107.055, |
|
"eval_steps_per_second": 32.97, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.708960104643559e-06, |
|
"loss": 0.095, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.9833367201788255, |
|
"eval_f1": 0.983300542812346, |
|
"eval_loss": 0.07264702767133713, |
|
"eval_runtime": 4.2709, |
|
"eval_samples_per_second": 2304.409, |
|
"eval_steps_per_second": 36.058, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.545454545454547e-06, |
|
"loss": 0.0888, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.983539930908352, |
|
"eval_f1": 0.9834936652245868, |
|
"eval_loss": 0.06838709861040115, |
|
"eval_runtime": 4.2889, |
|
"eval_samples_per_second": 2294.739, |
|
"eval_steps_per_second": 35.906, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.381948986265533e-06, |
|
"loss": 0.0833, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9846575899207478, |
|
"eval_f1": 0.984626969048982, |
|
"eval_loss": 0.05881618708372116, |
|
"eval_runtime": 4.3142, |
|
"eval_samples_per_second": 2281.283, |
|
"eval_steps_per_second": 35.696, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.218443427076522e-06, |
|
"loss": 0.0802, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.9832351148140622, |
|
"eval_f1": 0.9831849160413134, |
|
"eval_loss": 0.06230289116501808, |
|
"eval_runtime": 4.4668, |
|
"eval_samples_per_second": 2203.364, |
|
"eval_steps_per_second": 34.477, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.054937867887508e-06, |
|
"loss": 0.0822, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.9861816703921967, |
|
"eval_f1": 0.9861489775586069, |
|
"eval_loss": 0.05556264892220497, |
|
"eval_runtime": 4.7392, |
|
"eval_samples_per_second": 2076.706, |
|
"eval_steps_per_second": 32.495, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.891432308698496e-06, |
|
"loss": 0.0764, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.9852672221093274, |
|
"eval_f1": 0.9852285614441253, |
|
"eval_loss": 0.05288001894950867, |
|
"eval_runtime": 4.2997, |
|
"eval_samples_per_second": 2288.976, |
|
"eval_steps_per_second": 35.816, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.727926749509485e-06, |
|
"loss": 0.0782, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.9860800650274334, |
|
"eval_f1": 0.9860421413180931, |
|
"eval_loss": 0.05745375156402588, |
|
"eval_runtime": 4.289, |
|
"eval_samples_per_second": 2294.686, |
|
"eval_steps_per_second": 35.905, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.564421190320473e-06, |
|
"loss": 0.0722, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9870961186750661, |
|
"eval_f1": 0.9870629938253499, |
|
"eval_loss": 0.053428150713443756, |
|
"eval_runtime": 4.4266, |
|
"eval_samples_per_second": 2223.361, |
|
"eval_steps_per_second": 34.789, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.40091563113146e-06, |
|
"loss": 0.071, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.985876854297907, |
|
"eval_f1": 0.9858488466773426, |
|
"eval_loss": 0.0532599538564682, |
|
"eval_runtime": 4.3538, |
|
"eval_samples_per_second": 2260.534, |
|
"eval_steps_per_second": 35.371, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.237410071942446e-06, |
|
"loss": 0.0723, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9860800650274334, |
|
"eval_f1": 0.9860406491659012, |
|
"eval_loss": 0.04955599084496498, |
|
"eval_runtime": 4.2895, |
|
"eval_samples_per_second": 2294.439, |
|
"eval_steps_per_second": 35.902, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.073904512753434e-06, |
|
"loss": 0.0713, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.9860800650274334, |
|
"eval_f1": 0.9860488223249062, |
|
"eval_loss": 0.051210932433605194, |
|
"eval_runtime": 4.2586, |
|
"eval_samples_per_second": 2311.113, |
|
"eval_steps_per_second": 36.163, |
|
"step": 6000 |
|
} |
|
], |
|
"max_steps": 25752, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.597164223963392e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|