|
{ |
|
"best_metric": 23.8081, |
|
"best_model_checkpoint": "/content/gdrive/MyDrive/FYP/Models/t5-base-bt2/checkpoint-400", |
|
"epoch": 2.840909090909091, |
|
"eval_steps": 100, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.2713575065135956, |
|
"learning_rate": 2.4318181818181818e-05, |
|
"loss": 0.1676, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_gen_len": 7.3308, |
|
"eval_loss": 0.15983037650585175, |
|
"eval_rouge1": 21.6708, |
|
"eval_rouge2": 9.2098, |
|
"eval_rougeL": 21.4437, |
|
"eval_rougeLsum": 21.3546, |
|
"eval_runtime": 16.7342, |
|
"eval_samples_per_second": 7.948, |
|
"eval_steps_per_second": 1.016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.2671663463115692, |
|
"learning_rate": 1.8636363636363638e-05, |
|
"loss": 0.1527, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_gen_len": 7.4436, |
|
"eval_loss": 0.15573656558990479, |
|
"eval_rouge1": 22.0171, |
|
"eval_rouge2": 9.3228, |
|
"eval_rougeL": 21.6387, |
|
"eval_rougeLsum": 21.5998, |
|
"eval_runtime": 13.5793, |
|
"eval_samples_per_second": 9.794, |
|
"eval_steps_per_second": 1.252, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.310281366109848, |
|
"learning_rate": 1.2954545454545455e-05, |
|
"loss": 0.1435, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_gen_len": 7.6165, |
|
"eval_loss": 0.153303861618042, |
|
"eval_rouge1": 22.5122, |
|
"eval_rouge2": 10.4875, |
|
"eval_rougeL": 21.8915, |
|
"eval_rougeLsum": 22.0003, |
|
"eval_runtime": 14.3705, |
|
"eval_samples_per_second": 9.255, |
|
"eval_steps_per_second": 1.183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 0.22745513916015625, |
|
"learning_rate": 7.329545454545455e-06, |
|
"loss": 0.1418, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_gen_len": 7.7744, |
|
"eval_loss": 0.1518535614013672, |
|
"eval_rouge1": 23.8081, |
|
"eval_rouge2": 11.223, |
|
"eval_rougeL": 23.3821, |
|
"eval_rougeLsum": 23.6075, |
|
"eval_runtime": 14.0505, |
|
"eval_samples_per_second": 9.466, |
|
"eval_steps_per_second": 1.21, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 0.27223658561706543, |
|
"learning_rate": 1.6477272727272728e-06, |
|
"loss": 0.1369, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_gen_len": 7.4812, |
|
"eval_loss": 0.1519903540611267, |
|
"eval_rouge1": 22.6861, |
|
"eval_rouge2": 10.8165, |
|
"eval_rougeL": 22.0984, |
|
"eval_rougeLsum": 22.2851, |
|
"eval_runtime": 14.0722, |
|
"eval_samples_per_second": 9.451, |
|
"eval_steps_per_second": 1.208, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 528, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 2434613646458880.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|