|
{ |
|
"best_metric": 2.0710394382476807, |
|
"best_model_checkpoint": "./checkpoints/barthez-deft-chimie/checkpoint-1062", |
|
"epoch": 20.0, |
|
"global_step": 2360, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.8805084745762714e-05, |
|
"loss": 3.8022, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 19.1538, |
|
"eval_loss": 2.5490684509277344, |
|
"eval_rouge1": 16.8208, |
|
"eval_rouge2": 7.0027, |
|
"eval_rougeL": 13.957, |
|
"eval_rougeLsum": 14.0479, |
|
"eval_runtime": 3.9642, |
|
"eval_samples_per_second": 19.676, |
|
"eval_steps_per_second": 5.045, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.7533898305084746e-05, |
|
"loss": 2.9286, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 19.9487, |
|
"eval_loss": 2.3073835372924805, |
|
"eval_rouge1": 17.5356, |
|
"eval_rouge2": 7.8717, |
|
"eval_rougeL": 14.4874, |
|
"eval_rougeLsum": 14.5044, |
|
"eval_runtime": 3.9919, |
|
"eval_samples_per_second": 19.54, |
|
"eval_steps_per_second": 5.01, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.627542372881356e-05, |
|
"loss": 2.5422, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 19.7051, |
|
"eval_loss": 2.232180118560791, |
|
"eval_rouge1": 19.6491, |
|
"eval_rouge2": 9.4156, |
|
"eval_rougeL": 15.9467, |
|
"eval_rougeLsum": 15.9433, |
|
"eval_runtime": 3.9423, |
|
"eval_samples_per_second": 19.785, |
|
"eval_steps_per_second": 5.073, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.5004237288135596e-05, |
|
"loss": 2.398, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 19.9231, |
|
"eval_loss": 2.150047540664673, |
|
"eval_rouge1": 18.7166, |
|
"eval_rouge2": 9.859, |
|
"eval_rougeL": 15.7535, |
|
"eval_rougeLsum": 15.8036, |
|
"eval_runtime": 3.904, |
|
"eval_samples_per_second": 19.98, |
|
"eval_steps_per_second": 5.123, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.373305084745763e-05, |
|
"loss": 2.2044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 19.6154, |
|
"eval_loss": 2.1371994018554688, |
|
"eval_rouge1": 19.978, |
|
"eval_rouge2": 10.6235, |
|
"eval_rougeL": 16.1348, |
|
"eval_rougeLsum": 16.1274, |
|
"eval_runtime": 3.9032, |
|
"eval_samples_per_second": 19.984, |
|
"eval_steps_per_second": 5.124, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.2461864406779662e-05, |
|
"loss": 2.0419, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 2.1203389830508474e-05, |
|
"loss": 1.9405, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 19.9744, |
|
"eval_loss": 2.099212884902954, |
|
"eval_rouge1": 20.226, |
|
"eval_rouge2": 10.551, |
|
"eval_rougeL": 16.6928, |
|
"eval_rougeLsum": 16.7211, |
|
"eval_runtime": 3.9012, |
|
"eval_samples_per_second": 19.994, |
|
"eval_steps_per_second": 5.127, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.993220338983051e-05, |
|
"loss": 1.8544, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 19.8846, |
|
"eval_loss": 2.0841410160064697, |
|
"eval_rouge1": 19.8869, |
|
"eval_rouge2": 10.8456, |
|
"eval_rougeL": 16.1072, |
|
"eval_rougeLsum": 16.097, |
|
"eval_runtime": 3.8835, |
|
"eval_samples_per_second": 20.085, |
|
"eval_steps_per_second": 5.15, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.866101694915254e-05, |
|
"loss": 1.7536, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 19.859, |
|
"eval_loss": 2.0790863037109375, |
|
"eval_rouge1": 19.3017, |
|
"eval_rouge2": 9.4921, |
|
"eval_rougeL": 16.1541, |
|
"eval_rougeLsum": 16.2167, |
|
"eval_runtime": 3.9024, |
|
"eval_samples_per_second": 19.988, |
|
"eval_steps_per_second": 5.125, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1.7389830508474576e-05, |
|
"loss": 1.6914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 19.8846, |
|
"eval_loss": 2.0710394382476807, |
|
"eval_rouge1": 21.3848, |
|
"eval_rouge2": 10.4088, |
|
"eval_rougeL": 17.1963, |
|
"eval_rougeLsum": 17.2254, |
|
"eval_runtime": 3.8762, |
|
"eval_samples_per_second": 20.123, |
|
"eval_steps_per_second": 5.16, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 1.611864406779661e-05, |
|
"loss": 1.654, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 19.9231, |
|
"eval_loss": 2.106861114501953, |
|
"eval_rouge1": 22.3811, |
|
"eval_rouge2": 10.7987, |
|
"eval_rougeL": 18.7595, |
|
"eval_rougeLsum": 18.761, |
|
"eval_runtime": 3.9276, |
|
"eval_samples_per_second": 19.859, |
|
"eval_steps_per_second": 5.092, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 1.4847457627118646e-05, |
|
"loss": 1.5899, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 19.8077, |
|
"eval_loss": 2.09187650680542, |
|
"eval_rouge1": 20.8546, |
|
"eval_rouge2": 10.6958, |
|
"eval_rougeL": 16.8637, |
|
"eval_rougeLsum": 16.9499, |
|
"eval_runtime": 3.8954, |
|
"eval_samples_per_second": 20.024, |
|
"eval_steps_per_second": 5.134, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 1.357627118644068e-05, |
|
"loss": 1.534, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 1.2305084745762711e-05, |
|
"loss": 1.4661, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 19.9744, |
|
"eval_loss": 2.106508255004883, |
|
"eval_rouge1": 22.3677, |
|
"eval_rouge2": 11.7472, |
|
"eval_rougeL": 18.262, |
|
"eval_rougeLsum": 18.3, |
|
"eval_runtime": 3.8544, |
|
"eval_samples_per_second": 20.237, |
|
"eval_steps_per_second": 5.189, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 1.1033898305084746e-05, |
|
"loss": 1.4205, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 19.9359, |
|
"eval_loss": 2.116386651992798, |
|
"eval_rouge1": 20.5845, |
|
"eval_rouge2": 10.7825, |
|
"eval_rougeL": 16.9972, |
|
"eval_rougeLsum": 17.0216, |
|
"eval_runtime": 3.9246, |
|
"eval_samples_per_second": 19.875, |
|
"eval_steps_per_second": 5.096, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 9.76271186440678e-06, |
|
"loss": 1.3797, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 19.9744, |
|
"eval_loss": 2.124025344848633, |
|
"eval_rouge1": 22.2561, |
|
"eval_rouge2": 11.303, |
|
"eval_rougeL": 17.5064, |
|
"eval_rougeLsum": 17.5815, |
|
"eval_runtime": 3.8828, |
|
"eval_samples_per_second": 20.089, |
|
"eval_steps_per_second": 5.151, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 8.491525423728813e-06, |
|
"loss": 1.3724, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 19.9359, |
|
"eval_loss": 2.118665933609009, |
|
"eval_rouge1": 23.2825, |
|
"eval_rouge2": 11.912, |
|
"eval_rougeL": 18.5208, |
|
"eval_rougeLsum": 18.5499, |
|
"eval_runtime": 3.9164, |
|
"eval_samples_per_second": 19.916, |
|
"eval_steps_per_second": 5.107, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 7.220338983050848e-06, |
|
"loss": 1.3404, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 19.9744, |
|
"eval_loss": 2.139371395111084, |
|
"eval_rouge1": 22.1305, |
|
"eval_rouge2": 10.5258, |
|
"eval_rougeL": 17.772, |
|
"eval_rougeLsum": 17.8202, |
|
"eval_runtime": 3.9181, |
|
"eval_samples_per_second": 19.908, |
|
"eval_steps_per_second": 5.105, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 5.949152542372881e-06, |
|
"loss": 1.2967, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 4.677966101694916e-06, |
|
"loss": 1.2846, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 20.0, |
|
"eval_loss": 2.1501927375793457, |
|
"eval_rouge1": 21.567, |
|
"eval_rouge2": 11.0557, |
|
"eval_rougeL": 17.2562, |
|
"eval_rougeLsum": 17.2974, |
|
"eval_runtime": 3.8698, |
|
"eval_samples_per_second": 20.156, |
|
"eval_steps_per_second": 5.168, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 3.406779661016949e-06, |
|
"loss": 1.2871, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 19.9744, |
|
"eval_loss": 2.1571571826934814, |
|
"eval_rouge1": 22.5871, |
|
"eval_rouge2": 11.702, |
|
"eval_rougeL": 18.2906, |
|
"eval_rougeLsum": 18.3826, |
|
"eval_runtime": 3.8649, |
|
"eval_samples_per_second": 20.182, |
|
"eval_steps_per_second": 5.175, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 2.1355932203389833e-06, |
|
"loss": 1.2422, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 19.9744, |
|
"eval_loss": 2.161255359649658, |
|
"eval_rouge1": 23.0935, |
|
"eval_rouge2": 11.6824, |
|
"eval_rougeL": 18.6087, |
|
"eval_rougeLsum": 18.6777, |
|
"eval_runtime": 3.8614, |
|
"eval_samples_per_second": 20.2, |
|
"eval_steps_per_second": 5.18, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 8.64406779661017e-07, |
|
"loss": 1.2336, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 19.9487, |
|
"eval_loss": 2.158088445663452, |
|
"eval_rouge1": 22.6789, |
|
"eval_rouge2": 11.4363, |
|
"eval_rougeL": 18.1661, |
|
"eval_rougeLsum": 18.2346, |
|
"eval_runtime": 3.8624, |
|
"eval_samples_per_second": 20.195, |
|
"eval_steps_per_second": 5.178, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2360, |
|
"total_flos": 1837511390490624.0, |
|
"train_loss": 1.7793180174746757, |
|
"train_runtime": 373.3963, |
|
"train_samples_per_second": 25.121, |
|
"train_steps_per_second": 6.32 |
|
} |
|
], |
|
"max_steps": 2360, |
|
"num_train_epochs": 20, |
|
"total_flos": 1837511390490624.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|