|
{ |
|
"best_metric": 2.237640380859375, |
|
"best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14", |
|
"epoch": 13.182608695652174, |
|
"eval_steps": 500, |
|
"global_step": 45, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3076, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3217, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3519, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3449, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3197, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3342, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005, |
|
"loss": 0.324, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 246.7396449704142, |
|
"eval_loss": 2.237640380859375, |
|
"eval_rouge1": 40.4388, |
|
"eval_rouge2": 16.4662, |
|
"eval_rougeL": 28.0771, |
|
"eval_rougeLsum": 38.3405, |
|
"eval_runtime": 1741.7742, |
|
"eval_samples_per_second": 0.194, |
|
"eval_steps_per_second": 0.025, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_gen_len": 503.56508875739644, |
|
"eval_loss": 2.572709798812866, |
|
"eval_rouge1": 30.0123, |
|
"eval_rouge2": 12.3701, |
|
"eval_rougeL": 21.2834, |
|
"eval_rougeLsum": 28.891, |
|
"eval_runtime": 472.1582, |
|
"eval_samples_per_second": 0.716, |
|
"eval_steps_per_second": 0.023, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0005, |
|
"loss": 0.278, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3036, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_gen_len": 506.9526627218935, |
|
"eval_loss": 2.2658941745758057, |
|
"eval_rouge1": 27.8421, |
|
"eval_rouge2": 11.1942, |
|
"eval_rougeL": 20.4713, |
|
"eval_rougeLsum": 26.6097, |
|
"eval_runtime": 472.9998, |
|
"eval_samples_per_second": 0.715, |
|
"eval_steps_per_second": 0.023, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2937, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2941, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"eval_gen_len": 284.2307692307692, |
|
"eval_loss": 2.2636122703552246, |
|
"eval_rouge1": 40.8304, |
|
"eval_rouge2": 17.3615, |
|
"eval_rougeL": 28.0971, |
|
"eval_rougeLsum": 39.0943, |
|
"eval_runtime": 468.1667, |
|
"eval_samples_per_second": 0.722, |
|
"eval_steps_per_second": 0.023, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2508, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2642, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"eval_gen_len": 341.25147928994085, |
|
"eval_loss": 2.286357879638672, |
|
"eval_rouge1": 38.3377, |
|
"eval_rouge2": 15.8119, |
|
"eval_rougeL": 26.4838, |
|
"eval_rougeLsum": 36.5174, |
|
"eval_runtime": 469.1516, |
|
"eval_samples_per_second": 0.72, |
|
"eval_steps_per_second": 0.023, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2604, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_gen_len": 435.26331360946745, |
|
"eval_loss": 2.455064535140991, |
|
"eval_rouge1": 33.2021, |
|
"eval_rouge2": 13.6577, |
|
"eval_rougeL": 23.3288, |
|
"eval_rougeLsum": 31.8326, |
|
"eval_runtime": 471.959, |
|
"eval_samples_per_second": 0.716, |
|
"eval_steps_per_second": 0.023, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2422, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2237, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_gen_len": 234.61242603550295, |
|
"eval_loss": 2.615253210067749, |
|
"eval_rouge1": 40.3297, |
|
"eval_rouge2": 15.3786, |
|
"eval_rougeL": 28.1208, |
|
"eval_rougeLsum": 38.2426, |
|
"eval_runtime": 470.1581, |
|
"eval_samples_per_second": 0.719, |
|
"eval_steps_per_second": 0.023, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2184, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1904, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"eval_gen_len": 174.57396449704143, |
|
"eval_loss": 2.666517734527588, |
|
"eval_rouge1": 39.6006, |
|
"eval_rouge2": 14.9586, |
|
"eval_rougeL": 27.2453, |
|
"eval_rougeLsum": 37.6744, |
|
"eval_runtime": 450.9467, |
|
"eval_samples_per_second": 0.75, |
|
"eval_steps_per_second": 0.024, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2076, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2247, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"eval_gen_len": 500.58284023668637, |
|
"eval_loss": 2.722362518310547, |
|
"eval_rouge1": 30.5957, |
|
"eval_rouge2": 13.3496, |
|
"eval_rougeL": 21.9712, |
|
"eval_rougeLsum": 29.22, |
|
"eval_runtime": 477.6586, |
|
"eval_samples_per_second": 0.708, |
|
"eval_steps_per_second": 0.023, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1866, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0005, |
|
"loss": 0.182, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_gen_len": 259.6568047337278, |
|
"eval_loss": 3.271503448486328, |
|
"eval_rouge1": 41.6828, |
|
"eval_rouge2": 17.0818, |
|
"eval_rougeL": 28.087, |
|
"eval_rougeLsum": 39.5947, |
|
"eval_runtime": 468.2637, |
|
"eval_samples_per_second": 0.722, |
|
"eval_steps_per_second": 0.023, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"eval_gen_len": 484.396449704142, |
|
"eval_loss": 2.397312641143799, |
|
"eval_rouge1": 31.9833, |
|
"eval_rouge2": 14.0141, |
|
"eval_rougeL": 22.6823, |
|
"eval_rougeLsum": 30.6424, |
|
"eval_runtime": 471.2439, |
|
"eval_samples_per_second": 0.717, |
|
"eval_steps_per_second": 0.023, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"step": 45, |
|
"total_flos": 2.4232217682745754e+18, |
|
"train_loss": 0.16495737830797833, |
|
"train_runtime": 12728.0808, |
|
"train_samples_per_second": 4.329, |
|
"train_steps_per_second": 0.004 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 45, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 2.4232217682745754e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|