|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.994241842610364, |
|
"eval_steps": 500, |
|
"global_step": 3120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 26.296727272727274, |
|
"eval_loss": 1.8257849216461182, |
|
"eval_precision": 0.9049, |
|
"eval_recall": 0.9023, |
|
"eval_rouge1": 0.4338, |
|
"eval_rouge2": 0.1906, |
|
"eval_rougeL": 0.3496, |
|
"eval_rougeLsum": 0.3498, |
|
"eval_runtime": 513.5292, |
|
"eval_samples_per_second": 5.355, |
|
"eval_steps_per_second": 0.335, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.3589743589743592e-05, |
|
"loss": 2.1621, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9054, |
|
"eval_gen_len": 26.272727272727273, |
|
"eval_loss": 1.7537195682525635, |
|
"eval_precision": 0.9068, |
|
"eval_recall": 0.9044, |
|
"eval_rouge1": 0.4449, |
|
"eval_rouge2": 0.2005, |
|
"eval_rougeL": 0.3633, |
|
"eval_rougeLsum": 0.3633, |
|
"eval_runtime": 505.2109, |
|
"eval_samples_per_second": 5.443, |
|
"eval_steps_per_second": 0.34, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.17948717948718e-06, |
|
"loss": 1.8794, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9066, |
|
"eval_gen_len": 26.434545454545454, |
|
"eval_loss": 1.726783275604248, |
|
"eval_precision": 0.9078, |
|
"eval_recall": 0.9058, |
|
"eval_rouge1": 0.4518, |
|
"eval_rouge2": 0.2061, |
|
"eval_rougeL": 0.3696, |
|
"eval_rougeLsum": 0.3695, |
|
"eval_runtime": 507.6007, |
|
"eval_samples_per_second": 5.418, |
|
"eval_steps_per_second": 0.339, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.8271, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9069, |
|
"eval_gen_len": 26.39709090909091, |
|
"eval_loss": 1.7157036066055298, |
|
"eval_precision": 0.9082, |
|
"eval_recall": 0.906, |
|
"eval_rouge1": 0.4539, |
|
"eval_rouge2": 0.2075, |
|
"eval_rougeL": 0.3716, |
|
"eval_rougeLsum": 0.3714, |
|
"eval_runtime": 505.7397, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 0.34, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9074, |
|
"eval_gen_len": 26.301454545454547, |
|
"eval_loss": 1.703265905380249, |
|
"eval_precision": 0.9087, |
|
"eval_recall": 0.9065, |
|
"eval_rouge1": 0.4561, |
|
"eval_rouge2": 0.2098, |
|
"eval_rougeL": 0.3735, |
|
"eval_rougeLsum": 0.3734, |
|
"eval_runtime": 505.7439, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 0.34, |
|
"step": 1951 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.9059829059829063e-06, |
|
"loss": 1.8067, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9077, |
|
"eval_gen_len": 26.438909090909092, |
|
"eval_loss": 1.6896997690200806, |
|
"eval_precision": 0.9089, |
|
"eval_recall": 0.9069, |
|
"eval_rouge1": 0.4592, |
|
"eval_rouge2": 0.2114, |
|
"eval_rougeL": 0.3762, |
|
"eval_rougeLsum": 0.3759, |
|
"eval_runtime": 506.0324, |
|
"eval_samples_per_second": 5.434, |
|
"eval_steps_per_second": 0.34, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3.974358974358974e-06, |
|
"loss": 1.7833, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.9079, |
|
"eval_gen_len": 26.374545454545455, |
|
"eval_loss": 1.6819266080856323, |
|
"eval_precision": 0.9092, |
|
"eval_recall": 0.9071, |
|
"eval_rouge1": 0.4598, |
|
"eval_rouge2": 0.2115, |
|
"eval_rougeL": 0.3764, |
|
"eval_rougeLsum": 0.376, |
|
"eval_runtime": 506.9661, |
|
"eval_samples_per_second": 5.424, |
|
"eval_steps_per_second": 0.339, |
|
"step": 2731 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 1.7683, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_f1": 0.9081, |
|
"eval_gen_len": 26.465454545454545, |
|
"eval_loss": 1.6796071529388428, |
|
"eval_precision": 0.9092, |
|
"eval_recall": 0.9073, |
|
"eval_rouge1": 0.4613, |
|
"eval_rouge2": 0.2127, |
|
"eval_rougeL": 0.3775, |
|
"eval_rougeLsum": 0.3772, |
|
"eval_runtime": 504.4764, |
|
"eval_samples_per_second": 5.451, |
|
"eval_steps_per_second": 0.341, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"step": 3120, |
|
"total_flos": 5.768064442218578e+17, |
|
"train_loss": 0.637062493348733, |
|
"train_runtime": 6322.7563, |
|
"train_samples_per_second": 63.264, |
|
"train_steps_per_second": 0.493 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 5.768064442218578e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|