barthez-deft-archeologie / trainer_state.json
José Ángel González
add model
7c2f8a8
{
"best_metric": 2.0733225345611572,
"best_model_checkpoint": "./checkpoints/barthez-deft-archeologie/checkpoint-1080",
"epoch": 20.0,
"global_step": 2160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.93,
"learning_rate": 2.8694444444444447e-05,
"loss": 3.4832,
"step": 100
},
{
"epoch": 1.0,
"eval_gen_len": 15.8333,
"eval_loss": 2.4237029552459717,
"eval_rouge1": 22.6662,
"eval_rouge2": 10.009,
"eval_rougeL": 19.8729,
"eval_rougeLsum": 19.8814,
"eval_runtime": 3.8237,
"eval_samples_per_second": 18.83,
"eval_steps_per_second": 4.707,
"step": 108
},
{
"epoch": 1.85,
"learning_rate": 2.7305555555555556e-05,
"loss": 2.557,
"step": 200
},
{
"epoch": 2.0,
"eval_gen_len": 19.0139,
"eval_loss": 2.232834577560425,
"eval_rouge1": 24.8102,
"eval_rouge2": 11.9911,
"eval_rougeL": 20.4773,
"eval_rougeLsum": 20.696,
"eval_runtime": 3.7488,
"eval_samples_per_second": 19.206,
"eval_steps_per_second": 4.802,
"step": 216
},
{
"epoch": 2.78,
"learning_rate": 2.591666666666667e-05,
"loss": 2.2702,
"step": 300
},
{
"epoch": 3.0,
"eval_gen_len": 18.1944,
"eval_loss": 2.2001593112945557,
"eval_rouge1": 25.6482,
"eval_rouge2": 11.6191,
"eval_rougeL": 21.8383,
"eval_rougeLsum": 21.9341,
"eval_runtime": 3.6323,
"eval_samples_per_second": 19.822,
"eval_steps_per_second": 4.956,
"step": 324
},
{
"epoch": 3.7,
"learning_rate": 2.4527777777777778e-05,
"loss": 2.1119,
"step": 400
},
{
"epoch": 4.0,
"eval_gen_len": 19.4306,
"eval_loss": 2.126582145690918,
"eval_rouge1": 25.5806,
"eval_rouge2": 11.9765,
"eval_rougeL": 21.3973,
"eval_rougeLsum": 21.3503,
"eval_runtime": 3.6876,
"eval_samples_per_second": 19.525,
"eval_steps_per_second": 4.881,
"step": 432
},
{
"epoch": 4.63,
"learning_rate": 2.3138888888888887e-05,
"loss": 1.9582,
"step": 500
},
{
"epoch": 5.0,
"eval_gen_len": 19.1528,
"eval_loss": 2.107192039489746,
"eval_rouge1": 25.6578,
"eval_rouge2": 12.2709,
"eval_rougeL": 22.182,
"eval_rougeLsum": 22.0548,
"eval_runtime": 3.788,
"eval_samples_per_second": 19.007,
"eval_steps_per_second": 4.752,
"step": 540
},
{
"epoch": 5.56,
"learning_rate": 2.175e-05,
"loss": 1.8137,
"step": 600
},
{
"epoch": 6.0,
"eval_gen_len": 19.4722,
"eval_loss": 2.100757122039795,
"eval_rouge1": 26.5272,
"eval_rouge2": 11.4033,
"eval_rougeL": 22.359,
"eval_rougeLsum": 22.3259,
"eval_runtime": 3.6704,
"eval_samples_per_second": 19.617,
"eval_steps_per_second": 4.904,
"step": 648
},
{
"epoch": 6.48,
"learning_rate": 2.0361111111111113e-05,
"loss": 1.7725,
"step": 700
},
{
"epoch": 7.0,
"eval_gen_len": 19.1806,
"eval_loss": 2.107440948486328,
"eval_rouge1": 25.0405,
"eval_rouge2": 11.1773,
"eval_rougeL": 21.1369,
"eval_rougeLsum": 21.1847,
"eval_runtime": 3.6936,
"eval_samples_per_second": 19.493,
"eval_steps_per_second": 4.873,
"step": 756
},
{
"epoch": 7.41,
"learning_rate": 1.8972222222222222e-05,
"loss": 1.6772,
"step": 800
},
{
"epoch": 8.0,
"eval_gen_len": 19.3333,
"eval_loss": 2.0958752632141113,
"eval_rouge1": 26.5237,
"eval_rouge2": 11.6028,
"eval_rougeL": 22.5018,
"eval_rougeLsum": 22.3931,
"eval_runtime": 3.6558,
"eval_samples_per_second": 19.695,
"eval_steps_per_second": 4.924,
"step": 864
},
{
"epoch": 8.33,
"learning_rate": 1.7583333333333335e-05,
"loss": 1.5798,
"step": 900
},
{
"epoch": 9.0,
"eval_gen_len": 19.7222,
"eval_loss": 2.097611904144287,
"eval_rouge1": 27.7443,
"eval_rouge2": 11.9898,
"eval_rougeL": 22.4052,
"eval_rougeLsum": 22.2954,
"eval_runtime": 3.7405,
"eval_samples_per_second": 19.249,
"eval_steps_per_second": 4.812,
"step": 972
},
{
"epoch": 9.26,
"learning_rate": 1.6194444444444448e-05,
"loss": 1.4753,
"step": 1000
},
{
"epoch": 10.0,
"eval_gen_len": 19.8194,
"eval_loss": 2.0733225345611572,
"eval_rouge1": 28.3502,
"eval_rouge2": 12.9162,
"eval_rougeL": 22.6352,
"eval_rougeLsum": 22.6015,
"eval_runtime": 3.6963,
"eval_samples_per_second": 19.479,
"eval_steps_per_second": 4.87,
"step": 1080
},
{
"epoch": 10.19,
"learning_rate": 1.4819444444444445e-05,
"loss": 1.4646,
"step": 1100
},
{
"epoch": 11.0,
"eval_gen_len": 19.6111,
"eval_loss": 2.109070062637329,
"eval_rouge1": 27.9198,
"eval_rouge2": 12.8591,
"eval_rougeL": 23.0718,
"eval_rougeLsum": 23.0779,
"eval_runtime": 3.7289,
"eval_samples_per_second": 19.308,
"eval_steps_per_second": 4.827,
"step": 1188
},
{
"epoch": 11.11,
"learning_rate": 1.3430555555555556e-05,
"loss": 1.4082,
"step": 1200
},
{
"epoch": 12.0,
"eval_gen_len": 19.4861,
"eval_loss": 2.103557586669922,
"eval_rouge1": 28.8509,
"eval_rouge2": 13.0987,
"eval_rougeL": 23.4189,
"eval_rougeLsum": 23.5044,
"eval_runtime": 3.6418,
"eval_samples_per_second": 19.771,
"eval_steps_per_second": 4.943,
"step": 1296
},
{
"epoch": 12.04,
"learning_rate": 1.2041666666666667e-05,
"loss": 1.3951,
"step": 1300
},
{
"epoch": 12.96,
"learning_rate": 1.0652777777777778e-05,
"loss": 1.2862,
"step": 1400
},
{
"epoch": 13.0,
"eval_gen_len": 19.8611,
"eval_loss": 2.122204065322876,
"eval_rouge1": 28.6641,
"eval_rouge2": 12.8157,
"eval_rougeL": 22.6799,
"eval_rougeLsum": 22.7051,
"eval_runtime": 3.6343,
"eval_samples_per_second": 19.811,
"eval_steps_per_second": 4.953,
"step": 1404
},
{
"epoch": 13.89,
"learning_rate": 9.263888888888889e-06,
"loss": 1.2612,
"step": 1500
},
{
"epoch": 14.0,
"eval_gen_len": 19.875,
"eval_loss": 2.14872407913208,
"eval_rouge1": 26.9709,
"eval_rouge2": 11.6084,
"eval_rougeL": 22.0312,
"eval_rougeLsum": 22.0543,
"eval_runtime": 3.7019,
"eval_samples_per_second": 19.45,
"eval_steps_per_second": 4.862,
"step": 1512
},
{
"epoch": 14.81,
"learning_rate": 7.875e-06,
"loss": 1.2327,
"step": 1600
},
{
"epoch": 15.0,
"eval_gen_len": 19.7361,
"eval_loss": 2.180756092071533,
"eval_rouge1": 28.218,
"eval_rouge2": 12.6239,
"eval_rougeL": 22.7372,
"eval_rougeLsum": 22.7881,
"eval_runtime": 3.6942,
"eval_samples_per_second": 19.49,
"eval_steps_per_second": 4.872,
"step": 1620
},
{
"epoch": 15.74,
"learning_rate": 6.486111111111111e-06,
"loss": 1.2264,
"step": 1700
},
{
"epoch": 16.0,
"eval_gen_len": 19.7639,
"eval_loss": 2.177760124206543,
"eval_rouge1": 26.7393,
"eval_rouge2": 11.4474,
"eval_rougeL": 21.6057,
"eval_rougeLsum": 21.555,
"eval_runtime": 3.7636,
"eval_samples_per_second": 19.131,
"eval_steps_per_second": 4.783,
"step": 1728
},
{
"epoch": 16.67,
"learning_rate": 5.097222222222222e-06,
"loss": 1.1848,
"step": 1800
},
{
"epoch": 17.0,
"eval_gen_len": 19.6806,
"eval_loss": 2.1995341777801514,
"eval_rouge1": 27.6902,
"eval_rouge2": 12.1082,
"eval_rougeL": 22.0406,
"eval_rougeLsum": 22.0101,
"eval_runtime": 3.6502,
"eval_samples_per_second": 19.725,
"eval_steps_per_second": 4.931,
"step": 1836
},
{
"epoch": 17.59,
"learning_rate": 3.7083333333333335e-06,
"loss": 1.133,
"step": 1900
},
{
"epoch": 18.0,
"eval_gen_len": 19.8056,
"eval_loss": 2.2037861347198486,
"eval_rouge1": 27.0402,
"eval_rouge2": 12.1846,
"eval_rougeL": 21.7793,
"eval_rougeLsum": 21.7513,
"eval_runtime": 3.6766,
"eval_samples_per_second": 19.583,
"eval_steps_per_second": 4.896,
"step": 1944
},
{
"epoch": 18.52,
"learning_rate": 2.319444444444444e-06,
"loss": 1.168,
"step": 2000
},
{
"epoch": 19.0,
"eval_gen_len": 19.7222,
"eval_loss": 2.2116100788116455,
"eval_rouge1": 27.5149,
"eval_rouge2": 11.9876,
"eval_rougeL": 22.1113,
"eval_rougeLsum": 22.1527,
"eval_runtime": 3.6273,
"eval_samples_per_second": 19.849,
"eval_steps_per_second": 4.962,
"step": 2052
},
{
"epoch": 19.44,
"learning_rate": 9.305555555555555e-07,
"loss": 1.1206,
"step": 2100
},
{
"epoch": 20.0,
"eval_gen_len": 19.5972,
"eval_loss": 2.213322639465332,
"eval_rouge1": 28.2321,
"eval_rouge2": 12.677,
"eval_rougeL": 22.749,
"eval_rougeLsum": 22.8485,
"eval_runtime": 3.6293,
"eval_samples_per_second": 19.839,
"eval_steps_per_second": 4.96,
"step": 2160
},
{
"epoch": 20.0,
"step": 2160,
"total_flos": 2429194384871424.0,
"train_loss": 1.6326574502167879,
"train_runtime": 371.3395,
"train_samples_per_second": 23.213,
"train_steps_per_second": 5.817
}
],
"max_steps": 2160,
"num_train_epochs": 20,
"total_flos": 2429194384871424.0,
"trial_name": null,
"trial_params": null
}