longt5_xl_summ_screen_bp_only_30 / trainer_state.json
learn3r's picture
End of training
ddf30ed
raw
history blame
7.07 kB
{
"best_metric": 2.237640380859375,
"best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14",
"epoch": 13.182608695652174,
"eval_steps": 500,
"global_step": 45,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 0.0005,
"loss": 0.3076,
"step": 2
},
{
"epoch": 0.28,
"learning_rate": 0.0005,
"loss": 0.3217,
"step": 4
},
{
"epoch": 0.42,
"learning_rate": 0.0005,
"loss": 0.3519,
"step": 6
},
{
"epoch": 0.56,
"learning_rate": 0.0005,
"loss": 0.3449,
"step": 8
},
{
"epoch": 0.7,
"learning_rate": 0.0005,
"loss": 0.3197,
"step": 10
},
{
"epoch": 0.83,
"learning_rate": 0.0005,
"loss": 0.3342,
"step": 12
},
{
"epoch": 0.97,
"learning_rate": 0.0005,
"loss": 0.324,
"step": 14
},
{
"epoch": 0.97,
"eval_gen_len": 246.7396449704142,
"eval_loss": 2.237640380859375,
"eval_rouge1": 40.4388,
"eval_rouge2": 16.4662,
"eval_rougeL": 28.0771,
"eval_rougeLsum": 38.3405,
"eval_runtime": 1741.7742,
"eval_samples_per_second": 0.194,
"eval_steps_per_second": 0.025,
"step": 14
},
{
"epoch": 4.83,
"eval_gen_len": 503.56508875739644,
"eval_loss": 2.572709798812866,
"eval_rouge1": 30.0123,
"eval_rouge2": 12.3701,
"eval_rougeL": 21.2834,
"eval_rougeLsum": 28.891,
"eval_runtime": 472.1582,
"eval_samples_per_second": 0.716,
"eval_steps_per_second": 0.023,
"step": 15
},
{
"epoch": 5.11,
"learning_rate": 0.0005,
"loss": 0.278,
"step": 16
},
{
"epoch": 5.67,
"learning_rate": 0.0005,
"loss": 0.3036,
"step": 18
},
{
"epoch": 5.95,
"eval_gen_len": 506.9526627218935,
"eval_loss": 2.2658941745758057,
"eval_rouge1": 27.8421,
"eval_rouge2": 11.1942,
"eval_rougeL": 20.4713,
"eval_rougeLsum": 26.6097,
"eval_runtime": 472.9998,
"eval_samples_per_second": 0.715,
"eval_steps_per_second": 0.023,
"step": 19
},
{
"epoch": 6.23,
"learning_rate": 0.0005,
"loss": 0.2937,
"step": 20
},
{
"epoch": 6.78,
"learning_rate": 0.0005,
"loss": 0.2941,
"step": 22
},
{
"epoch": 6.78,
"eval_gen_len": 284.2307692307692,
"eval_loss": 2.2636122703552246,
"eval_rouge1": 40.8304,
"eval_rouge2": 17.3615,
"eval_rougeL": 28.0971,
"eval_rougeLsum": 39.0943,
"eval_runtime": 468.1667,
"eval_samples_per_second": 0.722,
"eval_steps_per_second": 0.023,
"step": 22
},
{
"epoch": 7.34,
"learning_rate": 0.0005,
"loss": 0.2508,
"step": 24
},
{
"epoch": 7.9,
"learning_rate": 0.0005,
"loss": 0.2642,
"step": 26
},
{
"epoch": 7.9,
"eval_gen_len": 341.25147928994085,
"eval_loss": 2.286357879638672,
"eval_rouge1": 38.3377,
"eval_rouge2": 15.8119,
"eval_rougeL": 26.4838,
"eval_rougeLsum": 36.5174,
"eval_runtime": 469.1516,
"eval_samples_per_second": 0.72,
"eval_steps_per_second": 0.023,
"step": 26
},
{
"epoch": 8.45,
"learning_rate": 0.0005,
"loss": 0.2604,
"step": 28
},
{
"epoch": 8.73,
"eval_gen_len": 435.26331360946745,
"eval_loss": 2.455064535140991,
"eval_rouge1": 33.2021,
"eval_rouge2": 13.6577,
"eval_rougeL": 23.3288,
"eval_rougeLsum": 31.8326,
"eval_runtime": 471.959,
"eval_samples_per_second": 0.716,
"eval_steps_per_second": 0.023,
"step": 29
},
{
"epoch": 9.01,
"learning_rate": 0.0005,
"loss": 0.2422,
"step": 30
},
{
"epoch": 9.57,
"learning_rate": 0.0005,
"loss": 0.2237,
"step": 32
},
{
"epoch": 9.84,
"eval_gen_len": 234.61242603550295,
"eval_loss": 2.615253210067749,
"eval_rouge1": 40.3297,
"eval_rouge2": 15.3786,
"eval_rougeL": 28.1208,
"eval_rougeLsum": 38.2426,
"eval_runtime": 470.1581,
"eval_samples_per_second": 0.719,
"eval_steps_per_second": 0.023,
"step": 33
},
{
"epoch": 10.12,
"learning_rate": 0.0005,
"loss": 0.2184,
"step": 34
},
{
"epoch": 10.68,
"learning_rate": 0.0005,
"loss": 0.1904,
"step": 36
},
{
"epoch": 10.96,
"eval_gen_len": 174.57396449704143,
"eval_loss": 2.666517734527588,
"eval_rouge1": 39.6006,
"eval_rouge2": 14.9586,
"eval_rougeL": 27.2453,
"eval_rougeLsum": 37.6744,
"eval_runtime": 450.9467,
"eval_samples_per_second": 0.75,
"eval_steps_per_second": 0.024,
"step": 37
},
{
"epoch": 11.23,
"learning_rate": 0.0005,
"loss": 0.2076,
"step": 38
},
{
"epoch": 11.79,
"learning_rate": 0.0005,
"loss": 0.2247,
"step": 40
},
{
"epoch": 11.79,
"eval_gen_len": 500.58284023668637,
"eval_loss": 2.722362518310547,
"eval_rouge1": 30.5957,
"eval_rouge2": 13.3496,
"eval_rougeL": 21.9712,
"eval_rougeLsum": 29.22,
"eval_runtime": 477.6586,
"eval_samples_per_second": 0.708,
"eval_steps_per_second": 0.023,
"step": 40
},
{
"epoch": 12.35,
"learning_rate": 0.0005,
"loss": 0.1866,
"step": 42
},
{
"epoch": 12.9,
"learning_rate": 0.0005,
"loss": 0.182,
"step": 44
},
{
"epoch": 12.9,
"eval_gen_len": 259.6568047337278,
"eval_loss": 3.271503448486328,
"eval_rouge1": 41.6828,
"eval_rouge2": 17.0818,
"eval_rougeL": 28.087,
"eval_rougeLsum": 39.5947,
"eval_runtime": 468.2637,
"eval_samples_per_second": 0.722,
"eval_steps_per_second": 0.023,
"step": 44
},
{
"epoch": 13.18,
"eval_gen_len": 484.396449704142,
"eval_loss": 2.397312641143799,
"eval_rouge1": 31.9833,
"eval_rouge2": 14.0141,
"eval_rougeL": 22.6823,
"eval_rougeLsum": 30.6424,
"eval_runtime": 471.2439,
"eval_samples_per_second": 0.717,
"eval_steps_per_second": 0.023,
"step": 45
},
{
"epoch": 13.18,
"step": 45,
"total_flos": 2.4232217682745754e+18,
"train_loss": 0.16495737830797833,
"train_runtime": 12728.0808,
"train_samples_per_second": 4.329,
"train_steps_per_second": 0.004
}
],
"logging_steps": 2,
"max_steps": 45,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 2.4232217682745754e+18,
"trial_name": null,
"trial_params": null
}