|
{ |
|
"best_metric": 1.7596173286437988, |
|
"best_model_checkpoint": "./checkpoints/barthez-deft-linguistique/checkpoint-756", |
|
"epoch": 20.0, |
|
"global_step": 2160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.8722222222222222e-05, |
|
"loss": 3.0569, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.3803, |
|
"eval_loss": 2.0281970500946045, |
|
"eval_rouge1": 31.6993, |
|
"eval_rouge2": 14.9483, |
|
"eval_rougeL": 25.5565, |
|
"eval_rougeLsum": 25.4379, |
|
"eval_runtime": 3.4728, |
|
"eval_samples_per_second": 20.444, |
|
"eval_steps_per_second": 5.183, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.7333333333333335e-05, |
|
"loss": 2.2892, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.507, |
|
"eval_loss": 1.8552746772766113, |
|
"eval_rouge1": 35.2563, |
|
"eval_rouge2": 18.019, |
|
"eval_rougeL": 28.3135, |
|
"eval_rougeLsum": 28.2927, |
|
"eval_runtime": 3.4017, |
|
"eval_samples_per_second": 20.872, |
|
"eval_steps_per_second": 5.291, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.5944444444444444e-05, |
|
"loss": 1.9062, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 19.5352, |
|
"eval_loss": 1.7696171998977661, |
|
"eval_rouge1": 37.4613, |
|
"eval_rouge2": 18.1488, |
|
"eval_rougeL": 28.9959, |
|
"eval_rougeLsum": 29.0134, |
|
"eval_runtime": 3.4439, |
|
"eval_samples_per_second": 20.616, |
|
"eval_steps_per_second": 5.227, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.4555555555555557e-05, |
|
"loss": 1.716, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 18.9577, |
|
"eval_loss": 1.7640784978866577, |
|
"eval_rouge1": 37.6903, |
|
"eval_rouge2": 18.7496, |
|
"eval_rougeL": 30.1097, |
|
"eval_rougeLsum": 30.1027, |
|
"eval_runtime": 3.4032, |
|
"eval_samples_per_second": 20.863, |
|
"eval_steps_per_second": 5.289, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.316666666666667e-05, |
|
"loss": 1.5722, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 19.169, |
|
"eval_loss": 1.7780805826187134, |
|
"eval_rouge1": 38.1013, |
|
"eval_rouge2": 19.8291, |
|
"eval_rougeL": 29.8142, |
|
"eval_rougeLsum": 29.802, |
|
"eval_runtime": 3.5765, |
|
"eval_samples_per_second": 19.852, |
|
"eval_steps_per_second": 5.033, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 2.177777777777778e-05, |
|
"loss": 1.4655, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 19.3662, |
|
"eval_loss": 1.7661303281784058, |
|
"eval_rouge1": 38.3557, |
|
"eval_rouge2": 20.3309, |
|
"eval_rougeL": 30.5068, |
|
"eval_rougeLsum": 30.4728, |
|
"eval_runtime": 3.4442, |
|
"eval_samples_per_second": 20.614, |
|
"eval_steps_per_second": 5.226, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 2.038888888888889e-05, |
|
"loss": 1.3507, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 19.3944, |
|
"eval_loss": 1.7596173286437988, |
|
"eval_rouge1": 39.7409, |
|
"eval_rouge2": 20.2998, |
|
"eval_rougeL": 31.0849, |
|
"eval_rougeLsum": 31.1152, |
|
"eval_runtime": 3.408, |
|
"eval_samples_per_second": 20.833, |
|
"eval_steps_per_second": 5.282, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.2874, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 19.4789, |
|
"eval_loss": 1.77056884765625, |
|
"eval_rouge1": 37.7846, |
|
"eval_rouge2": 20.3457, |
|
"eval_rougeL": 30.6826, |
|
"eval_rougeLsum": 30.6321, |
|
"eval_runtime": 3.4054, |
|
"eval_samples_per_second": 20.849, |
|
"eval_steps_per_second": 5.286, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.7652777777777777e-05, |
|
"loss": 1.2641, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 19.3944, |
|
"eval_loss": 1.784759521484375, |
|
"eval_rouge1": 38.7421, |
|
"eval_rouge2": 19.5701, |
|
"eval_rougeL": 30.5798, |
|
"eval_rougeLsum": 30.6305, |
|
"eval_runtime": 3.4074, |
|
"eval_samples_per_second": 20.837, |
|
"eval_steps_per_second": 5.283, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 1.626388888888889e-05, |
|
"loss": 1.1192, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 19.5493, |
|
"eval_loss": 1.8008346557617188, |
|
"eval_rouge1": 40.3313, |
|
"eval_rouge2": 20.3378, |
|
"eval_rougeL": 31.8325, |
|
"eval_rougeLsum": 31.8648, |
|
"eval_runtime": 3.4325, |
|
"eval_samples_per_second": 20.684, |
|
"eval_steps_per_second": 5.244, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"learning_rate": 1.4875e-05, |
|
"loss": 1.0724, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 19.8592, |
|
"eval_loss": 1.8450435400009155, |
|
"eval_rouge1": 38.9612, |
|
"eval_rouge2": 20.5719, |
|
"eval_rougeL": 31.4496, |
|
"eval_rougeLsum": 31.3144, |
|
"eval_runtime": 3.4687, |
|
"eval_samples_per_second": 20.469, |
|
"eval_steps_per_second": 5.189, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 1.348611111111111e-05, |
|
"loss": 1.0077, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 19.7324, |
|
"eval_loss": 1.8364313840866089, |
|
"eval_rouge1": 36.5997, |
|
"eval_rouge2": 18.46, |
|
"eval_rougeL": 29.1808, |
|
"eval_rougeLsum": 29.1705, |
|
"eval_runtime": 3.4932, |
|
"eval_samples_per_second": 20.325, |
|
"eval_steps_per_second": 5.153, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 1.2097222222222223e-05, |
|
"loss": 0.9743, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 1.0708333333333334e-05, |
|
"loss": 0.9362, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 19.6338, |
|
"eval_loss": 1.867732286453247, |
|
"eval_rouge1": 38.0371, |
|
"eval_rouge2": 19.2321, |
|
"eval_rougeL": 30.3893, |
|
"eval_rougeLsum": 30.3926, |
|
"eval_runtime": 3.4487, |
|
"eval_samples_per_second": 20.588, |
|
"eval_steps_per_second": 5.219, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 9.319444444444445e-06, |
|
"loss": 0.8868, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 19.6479, |
|
"eval_loss": 1.9153633117675781, |
|
"eval_rouge1": 36.4737, |
|
"eval_rouge2": 18.5314, |
|
"eval_rougeL": 29.325, |
|
"eval_rougeLsum": 29.3634, |
|
"eval_runtime": 3.4736, |
|
"eval_samples_per_second": 20.44, |
|
"eval_steps_per_second": 5.182, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 7.930555555555556e-06, |
|
"loss": 0.8335, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 19.8028, |
|
"eval_loss": 1.93436598777771, |
|
"eval_rouge1": 35.7583, |
|
"eval_rouge2": 18.0687, |
|
"eval_rougeL": 27.9666, |
|
"eval_rougeLsum": 27.8675, |
|
"eval_runtime": 3.3929, |
|
"eval_samples_per_second": 20.926, |
|
"eval_steps_per_second": 5.305, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 6.541666666666667e-06, |
|
"loss": 0.8305, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 19.9577, |
|
"eval_loss": 1.9556257724761963, |
|
"eval_rouge1": 37.2137, |
|
"eval_rouge2": 18.2199, |
|
"eval_rougeL": 29.5959, |
|
"eval_rougeLsum": 29.5799, |
|
"eval_runtime": 3.4517, |
|
"eval_samples_per_second": 20.57, |
|
"eval_steps_per_second": 5.215, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 5.152777777777778e-06, |
|
"loss": 0.8057, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 19.7324, |
|
"eval_loss": 1.9793369770050049, |
|
"eval_rouge1": 36.6834, |
|
"eval_rouge2": 17.8505, |
|
"eval_rougeL": 28.6701, |
|
"eval_rougeLsum": 28.7145, |
|
"eval_runtime": 3.4482, |
|
"eval_samples_per_second": 20.59, |
|
"eval_steps_per_second": 5.22, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 3.763888888888889e-06, |
|
"loss": 0.7869, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 19.7606, |
|
"eval_loss": 1.9994447231292725, |
|
"eval_rouge1": 37.5918, |
|
"eval_rouge2": 19.1984, |
|
"eval_rougeL": 28.8569, |
|
"eval_rougeLsum": 28.8278, |
|
"eval_runtime": 3.4143, |
|
"eval_samples_per_second": 20.795, |
|
"eval_steps_per_second": 5.272, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 2.375e-06, |
|
"loss": 0.7549, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 19.8028, |
|
"eval_loss": 2.011744737625122, |
|
"eval_rouge1": 37.3278, |
|
"eval_rouge2": 18.5169, |
|
"eval_rougeL": 28.778, |
|
"eval_rougeLsum": 28.7737, |
|
"eval_runtime": 3.4309, |
|
"eval_samples_per_second": 20.694, |
|
"eval_steps_per_second": 5.246, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 9.861111111111112e-07, |
|
"loss": 0.7497, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 19.6901, |
|
"eval_loss": 2.018871784210205, |
|
"eval_rouge1": 37.7513, |
|
"eval_rouge2": 19.1813, |
|
"eval_rougeL": 29.3675, |
|
"eval_rougeLsum": 29.402, |
|
"eval_runtime": 3.4324, |
|
"eval_samples_per_second": 20.685, |
|
"eval_steps_per_second": 5.244, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2160, |
|
"total_flos": 1555682356666368.0, |
|
"train_loss": 1.2554297270598236, |
|
"train_runtime": 336.8512, |
|
"train_samples_per_second": 25.471, |
|
"train_steps_per_second": 6.412 |
|
} |
|
], |
|
"max_steps": 2160, |
|
"num_train_epochs": 20, |
|
"total_flos": 1555682356666368.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|