|
{ |
|
"best_metric": 2.0733225345611572, |
|
"best_model_checkpoint": "./checkpoints/barthez-deft-archeologie/checkpoint-1080", |
|
"epoch": 20.0, |
|
"global_step": 2160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.8694444444444447e-05, |
|
"loss": 3.4832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 15.8333, |
|
"eval_loss": 2.4237029552459717, |
|
"eval_rouge1": 22.6662, |
|
"eval_rouge2": 10.009, |
|
"eval_rougeL": 19.8729, |
|
"eval_rougeLsum": 19.8814, |
|
"eval_runtime": 3.8237, |
|
"eval_samples_per_second": 18.83, |
|
"eval_steps_per_second": 4.707, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.7305555555555556e-05, |
|
"loss": 2.557, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 19.0139, |
|
"eval_loss": 2.232834577560425, |
|
"eval_rouge1": 24.8102, |
|
"eval_rouge2": 11.9911, |
|
"eval_rougeL": 20.4773, |
|
"eval_rougeLsum": 20.696, |
|
"eval_runtime": 3.7488, |
|
"eval_samples_per_second": 19.206, |
|
"eval_steps_per_second": 4.802, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.591666666666667e-05, |
|
"loss": 2.2702, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 18.1944, |
|
"eval_loss": 2.2001593112945557, |
|
"eval_rouge1": 25.6482, |
|
"eval_rouge2": 11.6191, |
|
"eval_rougeL": 21.8383, |
|
"eval_rougeLsum": 21.9341, |
|
"eval_runtime": 3.6323, |
|
"eval_samples_per_second": 19.822, |
|
"eval_steps_per_second": 4.956, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.4527777777777778e-05, |
|
"loss": 2.1119, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 19.4306, |
|
"eval_loss": 2.126582145690918, |
|
"eval_rouge1": 25.5806, |
|
"eval_rouge2": 11.9765, |
|
"eval_rougeL": 21.3973, |
|
"eval_rougeLsum": 21.3503, |
|
"eval_runtime": 3.6876, |
|
"eval_samples_per_second": 19.525, |
|
"eval_steps_per_second": 4.881, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.3138888888888887e-05, |
|
"loss": 1.9582, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 19.1528, |
|
"eval_loss": 2.107192039489746, |
|
"eval_rouge1": 25.6578, |
|
"eval_rouge2": 12.2709, |
|
"eval_rougeL": 22.182, |
|
"eval_rougeLsum": 22.0548, |
|
"eval_runtime": 3.788, |
|
"eval_samples_per_second": 19.007, |
|
"eval_steps_per_second": 4.752, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 2.175e-05, |
|
"loss": 1.8137, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 19.4722, |
|
"eval_loss": 2.100757122039795, |
|
"eval_rouge1": 26.5272, |
|
"eval_rouge2": 11.4033, |
|
"eval_rougeL": 22.359, |
|
"eval_rougeLsum": 22.3259, |
|
"eval_runtime": 3.6704, |
|
"eval_samples_per_second": 19.617, |
|
"eval_steps_per_second": 4.904, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 2.0361111111111113e-05, |
|
"loss": 1.7725, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 19.1806, |
|
"eval_loss": 2.107440948486328, |
|
"eval_rouge1": 25.0405, |
|
"eval_rouge2": 11.1773, |
|
"eval_rougeL": 21.1369, |
|
"eval_rougeLsum": 21.1847, |
|
"eval_runtime": 3.6936, |
|
"eval_samples_per_second": 19.493, |
|
"eval_steps_per_second": 4.873, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.8972222222222222e-05, |
|
"loss": 1.6772, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 19.3333, |
|
"eval_loss": 2.0958752632141113, |
|
"eval_rouge1": 26.5237, |
|
"eval_rouge2": 11.6028, |
|
"eval_rougeL": 22.5018, |
|
"eval_rougeLsum": 22.3931, |
|
"eval_runtime": 3.6558, |
|
"eval_samples_per_second": 19.695, |
|
"eval_steps_per_second": 4.924, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.7583333333333335e-05, |
|
"loss": 1.5798, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 19.7222, |
|
"eval_loss": 2.097611904144287, |
|
"eval_rouge1": 27.7443, |
|
"eval_rouge2": 11.9898, |
|
"eval_rougeL": 22.4052, |
|
"eval_rougeLsum": 22.2954, |
|
"eval_runtime": 3.7405, |
|
"eval_samples_per_second": 19.249, |
|
"eval_steps_per_second": 4.812, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 1.6194444444444448e-05, |
|
"loss": 1.4753, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 19.8194, |
|
"eval_loss": 2.0733225345611572, |
|
"eval_rouge1": 28.3502, |
|
"eval_rouge2": 12.9162, |
|
"eval_rougeL": 22.6352, |
|
"eval_rougeLsum": 22.6015, |
|
"eval_runtime": 3.6963, |
|
"eval_samples_per_second": 19.479, |
|
"eval_steps_per_second": 4.87, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 1.4819444444444445e-05, |
|
"loss": 1.4646, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 19.6111, |
|
"eval_loss": 2.109070062637329, |
|
"eval_rouge1": 27.9198, |
|
"eval_rouge2": 12.8591, |
|
"eval_rougeL": 23.0718, |
|
"eval_rougeLsum": 23.0779, |
|
"eval_runtime": 3.7289, |
|
"eval_samples_per_second": 19.308, |
|
"eval_steps_per_second": 4.827, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 1.3430555555555556e-05, |
|
"loss": 1.4082, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 19.4861, |
|
"eval_loss": 2.103557586669922, |
|
"eval_rouge1": 28.8509, |
|
"eval_rouge2": 13.0987, |
|
"eval_rougeL": 23.4189, |
|
"eval_rougeLsum": 23.5044, |
|
"eval_runtime": 3.6418, |
|
"eval_samples_per_second": 19.771, |
|
"eval_steps_per_second": 4.943, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 1.2041666666666667e-05, |
|
"loss": 1.3951, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 1.0652777777777778e-05, |
|
"loss": 1.2862, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 19.8611, |
|
"eval_loss": 2.122204065322876, |
|
"eval_rouge1": 28.6641, |
|
"eval_rouge2": 12.8157, |
|
"eval_rougeL": 22.6799, |
|
"eval_rougeLsum": 22.7051, |
|
"eval_runtime": 3.6343, |
|
"eval_samples_per_second": 19.811, |
|
"eval_steps_per_second": 4.953, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 9.263888888888889e-06, |
|
"loss": 1.2612, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 19.875, |
|
"eval_loss": 2.14872407913208, |
|
"eval_rouge1": 26.9709, |
|
"eval_rouge2": 11.6084, |
|
"eval_rougeL": 22.0312, |
|
"eval_rougeLsum": 22.0543, |
|
"eval_runtime": 3.7019, |
|
"eval_samples_per_second": 19.45, |
|
"eval_steps_per_second": 4.862, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 7.875e-06, |
|
"loss": 1.2327, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 19.7361, |
|
"eval_loss": 2.180756092071533, |
|
"eval_rouge1": 28.218, |
|
"eval_rouge2": 12.6239, |
|
"eval_rougeL": 22.7372, |
|
"eval_rougeLsum": 22.7881, |
|
"eval_runtime": 3.6942, |
|
"eval_samples_per_second": 19.49, |
|
"eval_steps_per_second": 4.872, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 6.486111111111111e-06, |
|
"loss": 1.2264, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 19.7639, |
|
"eval_loss": 2.177760124206543, |
|
"eval_rouge1": 26.7393, |
|
"eval_rouge2": 11.4474, |
|
"eval_rougeL": 21.6057, |
|
"eval_rougeLsum": 21.555, |
|
"eval_runtime": 3.7636, |
|
"eval_samples_per_second": 19.131, |
|
"eval_steps_per_second": 4.783, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 5.097222222222222e-06, |
|
"loss": 1.1848, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 19.6806, |
|
"eval_loss": 2.1995341777801514, |
|
"eval_rouge1": 27.6902, |
|
"eval_rouge2": 12.1082, |
|
"eval_rougeL": 22.0406, |
|
"eval_rougeLsum": 22.0101, |
|
"eval_runtime": 3.6502, |
|
"eval_samples_per_second": 19.725, |
|
"eval_steps_per_second": 4.931, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 3.7083333333333335e-06, |
|
"loss": 1.133, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 19.8056, |
|
"eval_loss": 2.2037861347198486, |
|
"eval_rouge1": 27.0402, |
|
"eval_rouge2": 12.1846, |
|
"eval_rougeL": 21.7793, |
|
"eval_rougeLsum": 21.7513, |
|
"eval_runtime": 3.6766, |
|
"eval_samples_per_second": 19.583, |
|
"eval_steps_per_second": 4.896, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 2.319444444444444e-06, |
|
"loss": 1.168, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 19.7222, |
|
"eval_loss": 2.2116100788116455, |
|
"eval_rouge1": 27.5149, |
|
"eval_rouge2": 11.9876, |
|
"eval_rougeL": 22.1113, |
|
"eval_rougeLsum": 22.1527, |
|
"eval_runtime": 3.6273, |
|
"eval_samples_per_second": 19.849, |
|
"eval_steps_per_second": 4.962, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 9.305555555555555e-07, |
|
"loss": 1.1206, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 19.5972, |
|
"eval_loss": 2.213322639465332, |
|
"eval_rouge1": 28.2321, |
|
"eval_rouge2": 12.677, |
|
"eval_rougeL": 22.749, |
|
"eval_rougeLsum": 22.8485, |
|
"eval_runtime": 3.6293, |
|
"eval_samples_per_second": 19.839, |
|
"eval_steps_per_second": 4.96, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2160, |
|
"total_flos": 2429194384871424.0, |
|
"train_loss": 1.6326574502167879, |
|
"train_runtime": 371.3395, |
|
"train_samples_per_second": 23.213, |
|
"train_steps_per_second": 5.817 |
|
} |
|
], |
|
"max_steps": 2160, |
|
"num_train_epochs": 20, |
|
"total_flos": 2429194384871424.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|