barthez-deft-chimie / trainer_state.json
José Ángel González
add model
494a76a
raw
history blame
10.5 kB
{
"best_metric": 2.0710394382476807,
"best_model_checkpoint": "./checkpoints/barthez-deft-chimie/checkpoint-1062",
"epoch": 20.0,
"global_step": 2360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.85,
"learning_rate": 2.8805084745762714e-05,
"loss": 3.8022,
"step": 100
},
{
"epoch": 1.0,
"eval_gen_len": 19.1538,
"eval_loss": 2.5490684509277344,
"eval_rouge1": 16.8208,
"eval_rouge2": 7.0027,
"eval_rougeL": 13.957,
"eval_rougeLsum": 14.0479,
"eval_runtime": 3.9642,
"eval_samples_per_second": 19.676,
"eval_steps_per_second": 5.045,
"step": 118
},
{
"epoch": 1.69,
"learning_rate": 2.7533898305084746e-05,
"loss": 2.9286,
"step": 200
},
{
"epoch": 2.0,
"eval_gen_len": 19.9487,
"eval_loss": 2.3073835372924805,
"eval_rouge1": 17.5356,
"eval_rouge2": 7.8717,
"eval_rougeL": 14.4874,
"eval_rougeLsum": 14.5044,
"eval_runtime": 3.9919,
"eval_samples_per_second": 19.54,
"eval_steps_per_second": 5.01,
"step": 236
},
{
"epoch": 2.54,
"learning_rate": 2.627542372881356e-05,
"loss": 2.5422,
"step": 300
},
{
"epoch": 3.0,
"eval_gen_len": 19.7051,
"eval_loss": 2.232180118560791,
"eval_rouge1": 19.6491,
"eval_rouge2": 9.4156,
"eval_rougeL": 15.9467,
"eval_rougeLsum": 15.9433,
"eval_runtime": 3.9423,
"eval_samples_per_second": 19.785,
"eval_steps_per_second": 5.073,
"step": 354
},
{
"epoch": 3.39,
"learning_rate": 2.5004237288135596e-05,
"loss": 2.398,
"step": 400
},
{
"epoch": 4.0,
"eval_gen_len": 19.9231,
"eval_loss": 2.150047540664673,
"eval_rouge1": 18.7166,
"eval_rouge2": 9.859,
"eval_rougeL": 15.7535,
"eval_rougeLsum": 15.8036,
"eval_runtime": 3.904,
"eval_samples_per_second": 19.98,
"eval_steps_per_second": 5.123,
"step": 472
},
{
"epoch": 4.24,
"learning_rate": 2.373305084745763e-05,
"loss": 2.2044,
"step": 500
},
{
"epoch": 5.0,
"eval_gen_len": 19.6154,
"eval_loss": 2.1371994018554688,
"eval_rouge1": 19.978,
"eval_rouge2": 10.6235,
"eval_rougeL": 16.1348,
"eval_rougeLsum": 16.1274,
"eval_runtime": 3.9032,
"eval_samples_per_second": 19.984,
"eval_steps_per_second": 5.124,
"step": 590
},
{
"epoch": 5.08,
"learning_rate": 2.2461864406779662e-05,
"loss": 2.0419,
"step": 600
},
{
"epoch": 5.93,
"learning_rate": 2.1203389830508474e-05,
"loss": 1.9405,
"step": 700
},
{
"epoch": 6.0,
"eval_gen_len": 19.9744,
"eval_loss": 2.099212884902954,
"eval_rouge1": 20.226,
"eval_rouge2": 10.551,
"eval_rougeL": 16.6928,
"eval_rougeLsum": 16.7211,
"eval_runtime": 3.9012,
"eval_samples_per_second": 19.994,
"eval_steps_per_second": 5.127,
"step": 708
},
{
"epoch": 6.78,
"learning_rate": 1.993220338983051e-05,
"loss": 1.8544,
"step": 800
},
{
"epoch": 7.0,
"eval_gen_len": 19.8846,
"eval_loss": 2.0841410160064697,
"eval_rouge1": 19.8869,
"eval_rouge2": 10.8456,
"eval_rougeL": 16.1072,
"eval_rougeLsum": 16.097,
"eval_runtime": 3.8835,
"eval_samples_per_second": 20.085,
"eval_steps_per_second": 5.15,
"step": 826
},
{
"epoch": 7.63,
"learning_rate": 1.866101694915254e-05,
"loss": 1.7536,
"step": 900
},
{
"epoch": 8.0,
"eval_gen_len": 19.859,
"eval_loss": 2.0790863037109375,
"eval_rouge1": 19.3017,
"eval_rouge2": 9.4921,
"eval_rougeL": 16.1541,
"eval_rougeLsum": 16.2167,
"eval_runtime": 3.9024,
"eval_samples_per_second": 19.988,
"eval_steps_per_second": 5.125,
"step": 944
},
{
"epoch": 8.47,
"learning_rate": 1.7389830508474576e-05,
"loss": 1.6914,
"step": 1000
},
{
"epoch": 9.0,
"eval_gen_len": 19.8846,
"eval_loss": 2.0710394382476807,
"eval_rouge1": 21.3848,
"eval_rouge2": 10.4088,
"eval_rougeL": 17.1963,
"eval_rougeLsum": 17.2254,
"eval_runtime": 3.8762,
"eval_samples_per_second": 20.123,
"eval_steps_per_second": 5.16,
"step": 1062
},
{
"epoch": 9.32,
"learning_rate": 1.611864406779661e-05,
"loss": 1.654,
"step": 1100
},
{
"epoch": 10.0,
"eval_gen_len": 19.9231,
"eval_loss": 2.106861114501953,
"eval_rouge1": 22.3811,
"eval_rouge2": 10.7987,
"eval_rougeL": 18.7595,
"eval_rougeLsum": 18.761,
"eval_runtime": 3.9276,
"eval_samples_per_second": 19.859,
"eval_steps_per_second": 5.092,
"step": 1180
},
{
"epoch": 10.17,
"learning_rate": 1.4847457627118646e-05,
"loss": 1.5899,
"step": 1200
},
{
"epoch": 11.0,
"eval_gen_len": 19.8077,
"eval_loss": 2.09187650680542,
"eval_rouge1": 20.8546,
"eval_rouge2": 10.6958,
"eval_rougeL": 16.8637,
"eval_rougeLsum": 16.9499,
"eval_runtime": 3.8954,
"eval_samples_per_second": 20.024,
"eval_steps_per_second": 5.134,
"step": 1298
},
{
"epoch": 11.02,
"learning_rate": 1.357627118644068e-05,
"loss": 1.534,
"step": 1300
},
{
"epoch": 11.86,
"learning_rate": 1.2305084745762711e-05,
"loss": 1.4661,
"step": 1400
},
{
"epoch": 12.0,
"eval_gen_len": 19.9744,
"eval_loss": 2.106508255004883,
"eval_rouge1": 22.3677,
"eval_rouge2": 11.7472,
"eval_rougeL": 18.262,
"eval_rougeLsum": 18.3,
"eval_runtime": 3.8544,
"eval_samples_per_second": 20.237,
"eval_steps_per_second": 5.189,
"step": 1416
},
{
"epoch": 12.71,
"learning_rate": 1.1033898305084746e-05,
"loss": 1.4205,
"step": 1500
},
{
"epoch": 13.0,
"eval_gen_len": 19.9359,
"eval_loss": 2.116386651992798,
"eval_rouge1": 20.5845,
"eval_rouge2": 10.7825,
"eval_rougeL": 16.9972,
"eval_rougeLsum": 17.0216,
"eval_runtime": 3.9246,
"eval_samples_per_second": 19.875,
"eval_steps_per_second": 5.096,
"step": 1534
},
{
"epoch": 13.56,
"learning_rate": 9.76271186440678e-06,
"loss": 1.3797,
"step": 1600
},
{
"epoch": 14.0,
"eval_gen_len": 19.9744,
"eval_loss": 2.124025344848633,
"eval_rouge1": 22.2561,
"eval_rouge2": 11.303,
"eval_rougeL": 17.5064,
"eval_rougeLsum": 17.5815,
"eval_runtime": 3.8828,
"eval_samples_per_second": 20.089,
"eval_steps_per_second": 5.151,
"step": 1652
},
{
"epoch": 14.41,
"learning_rate": 8.491525423728813e-06,
"loss": 1.3724,
"step": 1700
},
{
"epoch": 15.0,
"eval_gen_len": 19.9359,
"eval_loss": 2.118665933609009,
"eval_rouge1": 23.2825,
"eval_rouge2": 11.912,
"eval_rougeL": 18.5208,
"eval_rougeLsum": 18.5499,
"eval_runtime": 3.9164,
"eval_samples_per_second": 19.916,
"eval_steps_per_second": 5.107,
"step": 1770
},
{
"epoch": 15.25,
"learning_rate": 7.220338983050848e-06,
"loss": 1.3404,
"step": 1800
},
{
"epoch": 16.0,
"eval_gen_len": 19.9744,
"eval_loss": 2.139371395111084,
"eval_rouge1": 22.1305,
"eval_rouge2": 10.5258,
"eval_rougeL": 17.772,
"eval_rougeLsum": 17.8202,
"eval_runtime": 3.9181,
"eval_samples_per_second": 19.908,
"eval_steps_per_second": 5.105,
"step": 1888
},
{
"epoch": 16.1,
"learning_rate": 5.949152542372881e-06,
"loss": 1.2967,
"step": 1900
},
{
"epoch": 16.95,
"learning_rate": 4.677966101694916e-06,
"loss": 1.2846,
"step": 2000
},
{
"epoch": 17.0,
"eval_gen_len": 20.0,
"eval_loss": 2.1501927375793457,
"eval_rouge1": 21.567,
"eval_rouge2": 11.0557,
"eval_rougeL": 17.2562,
"eval_rougeLsum": 17.2974,
"eval_runtime": 3.8698,
"eval_samples_per_second": 20.156,
"eval_steps_per_second": 5.168,
"step": 2006
},
{
"epoch": 17.8,
"learning_rate": 3.406779661016949e-06,
"loss": 1.2871,
"step": 2100
},
{
"epoch": 18.0,
"eval_gen_len": 19.9744,
"eval_loss": 2.1571571826934814,
"eval_rouge1": 22.5871,
"eval_rouge2": 11.702,
"eval_rougeL": 18.2906,
"eval_rougeLsum": 18.3826,
"eval_runtime": 3.8649,
"eval_samples_per_second": 20.182,
"eval_steps_per_second": 5.175,
"step": 2124
},
{
"epoch": 18.64,
"learning_rate": 2.1355932203389833e-06,
"loss": 1.2422,
"step": 2200
},
{
"epoch": 19.0,
"eval_gen_len": 19.9744,
"eval_loss": 2.161255359649658,
"eval_rouge1": 23.0935,
"eval_rouge2": 11.6824,
"eval_rougeL": 18.6087,
"eval_rougeLsum": 18.6777,
"eval_runtime": 3.8614,
"eval_samples_per_second": 20.2,
"eval_steps_per_second": 5.18,
"step": 2242
},
{
"epoch": 19.49,
"learning_rate": 8.64406779661017e-07,
"loss": 1.2336,
"step": 2300
},
{
"epoch": 20.0,
"eval_gen_len": 19.9487,
"eval_loss": 2.158088445663452,
"eval_rouge1": 22.6789,
"eval_rouge2": 11.4363,
"eval_rougeL": 18.1661,
"eval_rougeLsum": 18.2346,
"eval_runtime": 3.8624,
"eval_samples_per_second": 20.195,
"eval_steps_per_second": 5.178,
"step": 2360
},
{
"epoch": 20.0,
"step": 2360,
"total_flos": 1837511390490624.0,
"train_loss": 1.7793180174746757,
"train_runtime": 373.3963,
"train_samples_per_second": 25.121,
"train_steps_per_second": 6.32
}
],
"max_steps": 2360,
"num_train_epochs": 20,
"total_flos": 1837511390490624.0,
"trial_name": null,
"trial_params": null
}