bart-large-cnn / last-checkpoint /trainer_state.json
booksouls's picture
Training in progress, epoch 8, checkpoint
4d1cec8 verified
raw
history blame
5.34 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.989690721649485,
"eval_steps": 500,
"global_step": 567,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9896907216494846,
"grad_norm": 2.3543875217437744,
"learning_rate": 9.5e-06,
"loss": 3.0813,
"step": 63
},
{
"epoch": 0.9896907216494846,
"eval_gen_len": 247.969023,
"eval_loss": 2.5624194145202637,
"eval_rouge1": 0.365572,
"eval_rouge2": 0.067814,
"eval_rougeL": 0.174291,
"eval_rougeLsum": 0.338231,
"eval_runtime": 3587.8888,
"eval_samples_per_second": 0.351,
"eval_steps_per_second": 0.088,
"step": 63
},
{
"epoch": 1.9896907216494846,
"grad_norm": 1.331000566482544,
"learning_rate": 9e-06,
"loss": 2.6117,
"step": 126
},
{
"epoch": 1.9896907216494846,
"eval_gen_len": 254.757744,
"eval_loss": 2.461825132369995,
"eval_rouge1": 0.378265,
"eval_rouge2": 0.077138,
"eval_rougeL": 0.182503,
"eval_rougeLsum": 0.351427,
"eval_runtime": 3653.2761,
"eval_samples_per_second": 0.345,
"eval_steps_per_second": 0.086,
"step": 126
},
{
"epoch": 2.9896907216494846,
"grad_norm": 0.8951466083526611,
"learning_rate": 8.5e-06,
"loss": 2.5014,
"step": 189
},
{
"epoch": 2.9896907216494846,
"eval_gen_len": 254.992851,
"eval_loss": 2.4188287258148193,
"eval_rouge1": 0.374918,
"eval_rouge2": 0.074194,
"eval_rougeL": 0.179174,
"eval_rougeLsum": 0.347582,
"eval_runtime": 3488.9943,
"eval_samples_per_second": 0.361,
"eval_steps_per_second": 0.09,
"step": 189
},
{
"epoch": 3.9896907216494846,
"grad_norm": 0.8497774004936218,
"learning_rate": 8.000000000000001e-06,
"loss": 2.4322,
"step": 252
},
{
"epoch": 3.9896907216494846,
"eval_gen_len": 254.067514,
"eval_loss": 2.3958144187927246,
"eval_rouge1": 0.380259,
"eval_rouge2": 0.078095,
"eval_rougeL": 0.183458,
"eval_rougeLsum": 0.353376,
"eval_runtime": 3495.9619,
"eval_samples_per_second": 0.36,
"eval_steps_per_second": 0.09,
"step": 252
},
{
"epoch": 4.989690721649485,
"grad_norm": 0.8451509475708008,
"learning_rate": 7.500000000000001e-06,
"loss": 2.3836,
"step": 315
},
{
"epoch": 4.989690721649485,
"eval_gen_len": 254.285147,
"eval_loss": 2.3810665607452393,
"eval_rouge1": 0.382543,
"eval_rouge2": 0.079173,
"eval_rougeL": 0.183712,
"eval_rougeLsum": 0.355298,
"eval_runtime": 3613.6748,
"eval_samples_per_second": 0.348,
"eval_steps_per_second": 0.087,
"step": 315
},
{
"epoch": 5.989690721649485,
"grad_norm": 0.8150149583816528,
"learning_rate": 7e-06,
"loss": 2.3465,
"step": 378
},
{
"epoch": 5.989690721649485,
"eval_gen_len": 254.706116,
"eval_loss": 2.3701136112213135,
"eval_rouge1": 0.386206,
"eval_rouge2": 0.081617,
"eval_rougeL": 0.186495,
"eval_rougeLsum": 0.359838,
"eval_runtime": 3613.0416,
"eval_samples_per_second": 0.348,
"eval_steps_per_second": 0.087,
"step": 378
},
{
"epoch": 6.989690721649485,
"grad_norm": 0.8424471020698547,
"learning_rate": 6.5000000000000004e-06,
"loss": 2.3149,
"step": 441
},
{
"epoch": 6.989690721649485,
"eval_gen_len": 254.68467,
"eval_loss": 2.3630220890045166,
"eval_rouge1": 0.388393,
"eval_rouge2": 0.083801,
"eval_rougeL": 0.187039,
"eval_rougeLsum": 0.361264,
"eval_runtime": 3555.8921,
"eval_samples_per_second": 0.354,
"eval_steps_per_second": 0.089,
"step": 441
},
{
"epoch": 7.989690721649485,
"grad_norm": 0.8140623569488525,
"learning_rate": 6e-06,
"loss": 2.2876,
"step": 504
},
{
"epoch": 7.989690721649485,
"eval_gen_len": 255.17077,
"eval_loss": 2.356766939163208,
"eval_rouge1": 0.385187,
"eval_rouge2": 0.081047,
"eval_rougeL": 0.185354,
"eval_rougeLsum": 0.357705,
"eval_runtime": 3499.45,
"eval_samples_per_second": 0.36,
"eval_steps_per_second": 0.09,
"step": 504
},
{
"epoch": 8.989690721649485,
"grad_norm": 0.846682071685791,
"learning_rate": 5.500000000000001e-06,
"loss": 2.2639,
"step": 567
},
{
"epoch": 8.989690721649485,
"eval_gen_len": 254.744241,
"eval_loss": 2.3528521060943604,
"eval_rouge1": 0.389675,
"eval_rouge2": 0.083846,
"eval_rougeL": 0.187545,
"eval_rougeLsum": 0.361809,
"eval_runtime": 3507.2655,
"eval_samples_per_second": 0.359,
"eval_steps_per_second": 0.09,
"step": 567
}
],
"logging_steps": 500,
"max_steps": 1260,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.5885960285782016e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}