barthez-deft-linguistique / trainer_state.json
José Ángel González
add model
8034f42
{
"best_metric": 1.7596173286437988,
"best_model_checkpoint": "./checkpoints/barthez-deft-linguistique/checkpoint-756",
"epoch": 20.0,
"global_step": 2160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.93,
"learning_rate": 2.8722222222222222e-05,
"loss": 3.0569,
"step": 100
},
{
"epoch": 1.0,
"eval_gen_len": 18.3803,
"eval_loss": 2.0281970500946045,
"eval_rouge1": 31.6993,
"eval_rouge2": 14.9483,
"eval_rougeL": 25.5565,
"eval_rougeLsum": 25.4379,
"eval_runtime": 3.4728,
"eval_samples_per_second": 20.444,
"eval_steps_per_second": 5.183,
"step": 108
},
{
"epoch": 1.85,
"learning_rate": 2.7333333333333335e-05,
"loss": 2.2892,
"step": 200
},
{
"epoch": 2.0,
"eval_gen_len": 18.507,
"eval_loss": 1.8552746772766113,
"eval_rouge1": 35.2563,
"eval_rouge2": 18.019,
"eval_rougeL": 28.3135,
"eval_rougeLsum": 28.2927,
"eval_runtime": 3.4017,
"eval_samples_per_second": 20.872,
"eval_steps_per_second": 5.291,
"step": 216
},
{
"epoch": 2.78,
"learning_rate": 2.5944444444444444e-05,
"loss": 1.9062,
"step": 300
},
{
"epoch": 3.0,
"eval_gen_len": 19.5352,
"eval_loss": 1.7696171998977661,
"eval_rouge1": 37.4613,
"eval_rouge2": 18.1488,
"eval_rougeL": 28.9959,
"eval_rougeLsum": 29.0134,
"eval_runtime": 3.4439,
"eval_samples_per_second": 20.616,
"eval_steps_per_second": 5.227,
"step": 324
},
{
"epoch": 3.7,
"learning_rate": 2.4555555555555557e-05,
"loss": 1.716,
"step": 400
},
{
"epoch": 4.0,
"eval_gen_len": 18.9577,
"eval_loss": 1.7640784978866577,
"eval_rouge1": 37.6903,
"eval_rouge2": 18.7496,
"eval_rougeL": 30.1097,
"eval_rougeLsum": 30.1027,
"eval_runtime": 3.4032,
"eval_samples_per_second": 20.863,
"eval_steps_per_second": 5.289,
"step": 432
},
{
"epoch": 4.63,
"learning_rate": 2.316666666666667e-05,
"loss": 1.5722,
"step": 500
},
{
"epoch": 5.0,
"eval_gen_len": 19.169,
"eval_loss": 1.7780805826187134,
"eval_rouge1": 38.1013,
"eval_rouge2": 19.8291,
"eval_rougeL": 29.8142,
"eval_rougeLsum": 29.802,
"eval_runtime": 3.5765,
"eval_samples_per_second": 19.852,
"eval_steps_per_second": 5.033,
"step": 540
},
{
"epoch": 5.56,
"learning_rate": 2.177777777777778e-05,
"loss": 1.4655,
"step": 600
},
{
"epoch": 6.0,
"eval_gen_len": 19.3662,
"eval_loss": 1.7661303281784058,
"eval_rouge1": 38.3557,
"eval_rouge2": 20.3309,
"eval_rougeL": 30.5068,
"eval_rougeLsum": 30.4728,
"eval_runtime": 3.4442,
"eval_samples_per_second": 20.614,
"eval_steps_per_second": 5.226,
"step": 648
},
{
"epoch": 6.48,
"learning_rate": 2.038888888888889e-05,
"loss": 1.3507,
"step": 700
},
{
"epoch": 7.0,
"eval_gen_len": 19.3944,
"eval_loss": 1.7596173286437988,
"eval_rouge1": 39.7409,
"eval_rouge2": 20.2998,
"eval_rougeL": 31.0849,
"eval_rougeLsum": 31.1152,
"eval_runtime": 3.408,
"eval_samples_per_second": 20.833,
"eval_steps_per_second": 5.282,
"step": 756
},
{
"epoch": 7.41,
"learning_rate": 1.9e-05,
"loss": 1.2874,
"step": 800
},
{
"epoch": 8.0,
"eval_gen_len": 19.4789,
"eval_loss": 1.77056884765625,
"eval_rouge1": 37.7846,
"eval_rouge2": 20.3457,
"eval_rougeL": 30.6826,
"eval_rougeLsum": 30.6321,
"eval_runtime": 3.4054,
"eval_samples_per_second": 20.849,
"eval_steps_per_second": 5.286,
"step": 864
},
{
"epoch": 8.33,
"learning_rate": 1.7652777777777777e-05,
"loss": 1.2641,
"step": 900
},
{
"epoch": 9.0,
"eval_gen_len": 19.3944,
"eval_loss": 1.784759521484375,
"eval_rouge1": 38.7421,
"eval_rouge2": 19.5701,
"eval_rougeL": 30.5798,
"eval_rougeLsum": 30.6305,
"eval_runtime": 3.4074,
"eval_samples_per_second": 20.837,
"eval_steps_per_second": 5.283,
"step": 972
},
{
"epoch": 9.26,
"learning_rate": 1.626388888888889e-05,
"loss": 1.1192,
"step": 1000
},
{
"epoch": 10.0,
"eval_gen_len": 19.5493,
"eval_loss": 1.8008346557617188,
"eval_rouge1": 40.3313,
"eval_rouge2": 20.3378,
"eval_rougeL": 31.8325,
"eval_rougeLsum": 31.8648,
"eval_runtime": 3.4325,
"eval_samples_per_second": 20.684,
"eval_steps_per_second": 5.244,
"step": 1080
},
{
"epoch": 10.19,
"learning_rate": 1.4875e-05,
"loss": 1.0724,
"step": 1100
},
{
"epoch": 11.0,
"eval_gen_len": 19.8592,
"eval_loss": 1.8450435400009155,
"eval_rouge1": 38.9612,
"eval_rouge2": 20.5719,
"eval_rougeL": 31.4496,
"eval_rougeLsum": 31.3144,
"eval_runtime": 3.4687,
"eval_samples_per_second": 20.469,
"eval_steps_per_second": 5.189,
"step": 1188
},
{
"epoch": 11.11,
"learning_rate": 1.348611111111111e-05,
"loss": 1.0077,
"step": 1200
},
{
"epoch": 12.0,
"eval_gen_len": 19.7324,
"eval_loss": 1.8364313840866089,
"eval_rouge1": 36.5997,
"eval_rouge2": 18.46,
"eval_rougeL": 29.1808,
"eval_rougeLsum": 29.1705,
"eval_runtime": 3.4932,
"eval_samples_per_second": 20.325,
"eval_steps_per_second": 5.153,
"step": 1296
},
{
"epoch": 12.04,
"learning_rate": 1.2097222222222223e-05,
"loss": 0.9743,
"step": 1300
},
{
"epoch": 12.96,
"learning_rate": 1.0708333333333334e-05,
"loss": 0.9362,
"step": 1400
},
{
"epoch": 13.0,
"eval_gen_len": 19.6338,
"eval_loss": 1.867732286453247,
"eval_rouge1": 38.0371,
"eval_rouge2": 19.2321,
"eval_rougeL": 30.3893,
"eval_rougeLsum": 30.3926,
"eval_runtime": 3.4487,
"eval_samples_per_second": 20.588,
"eval_steps_per_second": 5.219,
"step": 1404
},
{
"epoch": 13.89,
"learning_rate": 9.319444444444445e-06,
"loss": 0.8868,
"step": 1500
},
{
"epoch": 14.0,
"eval_gen_len": 19.6479,
"eval_loss": 1.9153633117675781,
"eval_rouge1": 36.4737,
"eval_rouge2": 18.5314,
"eval_rougeL": 29.325,
"eval_rougeLsum": 29.3634,
"eval_runtime": 3.4736,
"eval_samples_per_second": 20.44,
"eval_steps_per_second": 5.182,
"step": 1512
},
{
"epoch": 14.81,
"learning_rate": 7.930555555555556e-06,
"loss": 0.8335,
"step": 1600
},
{
"epoch": 15.0,
"eval_gen_len": 19.8028,
"eval_loss": 1.93436598777771,
"eval_rouge1": 35.7583,
"eval_rouge2": 18.0687,
"eval_rougeL": 27.9666,
"eval_rougeLsum": 27.8675,
"eval_runtime": 3.3929,
"eval_samples_per_second": 20.926,
"eval_steps_per_second": 5.305,
"step": 1620
},
{
"epoch": 15.74,
"learning_rate": 6.541666666666667e-06,
"loss": 0.8305,
"step": 1700
},
{
"epoch": 16.0,
"eval_gen_len": 19.9577,
"eval_loss": 1.9556257724761963,
"eval_rouge1": 37.2137,
"eval_rouge2": 18.2199,
"eval_rougeL": 29.5959,
"eval_rougeLsum": 29.5799,
"eval_runtime": 3.4517,
"eval_samples_per_second": 20.57,
"eval_steps_per_second": 5.215,
"step": 1728
},
{
"epoch": 16.67,
"learning_rate": 5.152777777777778e-06,
"loss": 0.8057,
"step": 1800
},
{
"epoch": 17.0,
"eval_gen_len": 19.7324,
"eval_loss": 1.9793369770050049,
"eval_rouge1": 36.6834,
"eval_rouge2": 17.8505,
"eval_rougeL": 28.6701,
"eval_rougeLsum": 28.7145,
"eval_runtime": 3.4482,
"eval_samples_per_second": 20.59,
"eval_steps_per_second": 5.22,
"step": 1836
},
{
"epoch": 17.59,
"learning_rate": 3.763888888888889e-06,
"loss": 0.7869,
"step": 1900
},
{
"epoch": 18.0,
"eval_gen_len": 19.7606,
"eval_loss": 1.9994447231292725,
"eval_rouge1": 37.5918,
"eval_rouge2": 19.1984,
"eval_rougeL": 28.8569,
"eval_rougeLsum": 28.8278,
"eval_runtime": 3.4143,
"eval_samples_per_second": 20.795,
"eval_steps_per_second": 5.272,
"step": 1944
},
{
"epoch": 18.52,
"learning_rate": 2.375e-06,
"loss": 0.7549,
"step": 2000
},
{
"epoch": 19.0,
"eval_gen_len": 19.8028,
"eval_loss": 2.011744737625122,
"eval_rouge1": 37.3278,
"eval_rouge2": 18.5169,
"eval_rougeL": 28.778,
"eval_rougeLsum": 28.7737,
"eval_runtime": 3.4309,
"eval_samples_per_second": 20.694,
"eval_steps_per_second": 5.246,
"step": 2052
},
{
"epoch": 19.44,
"learning_rate": 9.861111111111112e-07,
"loss": 0.7497,
"step": 2100
},
{
"epoch": 20.0,
"eval_gen_len": 19.6901,
"eval_loss": 2.018871784210205,
"eval_rouge1": 37.7513,
"eval_rouge2": 19.1813,
"eval_rougeL": 29.3675,
"eval_rougeLsum": 29.402,
"eval_runtime": 3.4324,
"eval_samples_per_second": 20.685,
"eval_steps_per_second": 5.244,
"step": 2160
},
{
"epoch": 20.0,
"step": 2160,
"total_flos": 1555682356666368.0,
"train_loss": 1.2554297270598236,
"train_runtime": 336.8512,
"train_samples_per_second": 25.471,
"train_steps_per_second": 6.412
}
],
"max_steps": 2160,
"num_train_epochs": 20,
"total_flos": 1555682356666368.0,
"trial_name": null,
"trial_params": null
}