k4black's picture
Training in progress, step 6000
650d0fc
{
"best_metric": 0.9870629938253499,
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-base-e-snli-classification-nli_explanation-base/checkpoint-4800",
"epoch": 0.6989748369058714,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 3.1055900621118013e-06,
"loss": 0.7992,
"step": 400
},
{
"epoch": 0.05,
"eval_accuracy": 0.9624060150375939,
"eval_f1": 0.9623351677726931,
"eval_loss": 0.1367802619934082,
"eval_runtime": 4.2857,
"eval_samples_per_second": 2296.452,
"eval_steps_per_second": 35.933,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 6.2111801242236025e-06,
"loss": 0.1514,
"step": 800
},
{
"epoch": 0.09,
"eval_accuracy": 0.975716317821581,
"eval_f1": 0.9756730714212107,
"eval_loss": 0.08741448074579239,
"eval_runtime": 4.5673,
"eval_samples_per_second": 2154.901,
"eval_steps_per_second": 33.718,
"step": 800
},
{
"epoch": 0.14,
"learning_rate": 9.316770186335405e-06,
"loss": 0.121,
"step": 1200
},
{
"epoch": 0.14,
"eval_accuracy": 0.9778500304816095,
"eval_f1": 0.9778119667614332,
"eval_loss": 0.07925494015216827,
"eval_runtime": 4.3138,
"eval_samples_per_second": 2281.508,
"eval_steps_per_second": 35.699,
"step": 1200
},
{
"epoch": 0.19,
"learning_rate": 9.87246566383257e-06,
"loss": 0.1088,
"step": 1600
},
{
"epoch": 0.19,
"eval_accuracy": 0.9818126397073765,
"eval_f1": 0.9817742348532473,
"eval_loss": 0.07087873667478561,
"eval_runtime": 4.671,
"eval_samples_per_second": 2107.055,
"eval_steps_per_second": 32.97,
"step": 1600
},
{
"epoch": 0.23,
"learning_rate": 9.708960104643559e-06,
"loss": 0.095,
"step": 2000
},
{
"epoch": 0.23,
"eval_accuracy": 0.9833367201788255,
"eval_f1": 0.983300542812346,
"eval_loss": 0.07264702767133713,
"eval_runtime": 4.2709,
"eval_samples_per_second": 2304.409,
"eval_steps_per_second": 36.058,
"step": 2000
},
{
"epoch": 0.28,
"learning_rate": 9.545454545454547e-06,
"loss": 0.0888,
"step": 2400
},
{
"epoch": 0.28,
"eval_accuracy": 0.983539930908352,
"eval_f1": 0.9834936652245868,
"eval_loss": 0.06838709861040115,
"eval_runtime": 4.2889,
"eval_samples_per_second": 2294.739,
"eval_steps_per_second": 35.906,
"step": 2400
},
{
"epoch": 0.33,
"learning_rate": 9.381948986265533e-06,
"loss": 0.0833,
"step": 2800
},
{
"epoch": 0.33,
"eval_accuracy": 0.9846575899207478,
"eval_f1": 0.984626969048982,
"eval_loss": 0.05881618708372116,
"eval_runtime": 4.3142,
"eval_samples_per_second": 2281.283,
"eval_steps_per_second": 35.696,
"step": 2800
},
{
"epoch": 0.37,
"learning_rate": 9.218443427076522e-06,
"loss": 0.0802,
"step": 3200
},
{
"epoch": 0.37,
"eval_accuracy": 0.9832351148140622,
"eval_f1": 0.9831849160413134,
"eval_loss": 0.06230289116501808,
"eval_runtime": 4.4668,
"eval_samples_per_second": 2203.364,
"eval_steps_per_second": 34.477,
"step": 3200
},
{
"epoch": 0.42,
"learning_rate": 9.054937867887508e-06,
"loss": 0.0822,
"step": 3600
},
{
"epoch": 0.42,
"eval_accuracy": 0.9861816703921967,
"eval_f1": 0.9861489775586069,
"eval_loss": 0.05556264892220497,
"eval_runtime": 4.7392,
"eval_samples_per_second": 2076.706,
"eval_steps_per_second": 32.495,
"step": 3600
},
{
"epoch": 0.47,
"learning_rate": 8.891432308698496e-06,
"loss": 0.0764,
"step": 4000
},
{
"epoch": 0.47,
"eval_accuracy": 0.9852672221093274,
"eval_f1": 0.9852285614441253,
"eval_loss": 0.05288001894950867,
"eval_runtime": 4.2997,
"eval_samples_per_second": 2288.976,
"eval_steps_per_second": 35.816,
"step": 4000
},
{
"epoch": 0.51,
"learning_rate": 8.727926749509485e-06,
"loss": 0.0782,
"step": 4400
},
{
"epoch": 0.51,
"eval_accuracy": 0.9860800650274334,
"eval_f1": 0.9860421413180931,
"eval_loss": 0.05745375156402588,
"eval_runtime": 4.289,
"eval_samples_per_second": 2294.686,
"eval_steps_per_second": 35.905,
"step": 4400
},
{
"epoch": 0.56,
"learning_rate": 8.564421190320473e-06,
"loss": 0.0722,
"step": 4800
},
{
"epoch": 0.56,
"eval_accuracy": 0.9870961186750661,
"eval_f1": 0.9870629938253499,
"eval_loss": 0.053428150713443756,
"eval_runtime": 4.4266,
"eval_samples_per_second": 2223.361,
"eval_steps_per_second": 34.789,
"step": 4800
},
{
"epoch": 0.61,
"learning_rate": 8.40091563113146e-06,
"loss": 0.071,
"step": 5200
},
{
"epoch": 0.61,
"eval_accuracy": 0.985876854297907,
"eval_f1": 0.9858488466773426,
"eval_loss": 0.0532599538564682,
"eval_runtime": 4.3538,
"eval_samples_per_second": 2260.534,
"eval_steps_per_second": 35.371,
"step": 5200
},
{
"epoch": 0.65,
"learning_rate": 8.237410071942446e-06,
"loss": 0.0723,
"step": 5600
},
{
"epoch": 0.65,
"eval_accuracy": 0.9860800650274334,
"eval_f1": 0.9860406491659012,
"eval_loss": 0.04955599084496498,
"eval_runtime": 4.2895,
"eval_samples_per_second": 2294.439,
"eval_steps_per_second": 35.902,
"step": 5600
},
{
"epoch": 0.7,
"learning_rate": 8.073904512753434e-06,
"loss": 0.0713,
"step": 6000
},
{
"epoch": 0.7,
"eval_accuracy": 0.9860800650274334,
"eval_f1": 0.9860488223249062,
"eval_loss": 0.051210932433605194,
"eval_runtime": 4.2586,
"eval_samples_per_second": 2311.113,
"eval_steps_per_second": 36.163,
"step": 6000
}
],
"max_steps": 25752,
"num_train_epochs": 3,
"total_flos": 1.597164223963392e+16,
"trial_name": null,
"trial_params": null
}