|
{ |
|
"best_metric": 2.237640380859375, |
|
"best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14", |
|
"epoch": 14.608695652173914, |
|
"eval_steps": 500, |
|
"global_step": 210, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3076, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3217, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3519, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3449, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3197, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0005, |
|
"loss": 0.3342, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005, |
|
"loss": 0.324, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 246.7396449704142, |
|
"eval_loss": 2.237640380859375, |
|
"eval_rouge1": 40.4388, |
|
"eval_rouge2": 16.4662, |
|
"eval_rougeL": 28.0771, |
|
"eval_rougeLsum": 38.3405, |
|
"eval_runtime": 1709.2671, |
|
"eval_samples_per_second": 0.198, |
|
"eval_steps_per_second": 0.025, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2825, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2835, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2732, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2928, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2912, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2914, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2707, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_gen_len": 307.3786982248521, |
|
"eval_loss": 2.320437431335449, |
|
"eval_rouge1": 40.2873, |
|
"eval_rouge2": 16.7641, |
|
"eval_rougeL": 27.3895, |
|
"eval_rougeLsum": 38.2689, |
|
"eval_runtime": 1775.692, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2342, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2326, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2334, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2212, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2161, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2236, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2217, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_gen_len": 501.93195266272187, |
|
"eval_loss": 2.5281190872192383, |
|
"eval_rouge1": 31.9916, |
|
"eval_rouge2": 13.8136, |
|
"eval_rougeL": 22.1895, |
|
"eval_rougeLsum": 30.623, |
|
"eval_runtime": 1780.5368, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0005, |
|
"loss": 0.2055, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1943, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1875, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1909, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1881, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1541, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1776, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_gen_len": 489.6183431952663, |
|
"eval_loss": 2.7530014514923096, |
|
"eval_rouge1": 31.7535, |
|
"eval_rouge2": 13.8852, |
|
"eval_rougeL": 22.8653, |
|
"eval_rougeLsum": 30.3796, |
|
"eval_runtime": 1780.5033, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1876, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1589, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1529, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1464, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1689, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1492, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1424, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_gen_len": 502.11242603550295, |
|
"eval_loss": 2.657783031463623, |
|
"eval_rouge1": 32.117, |
|
"eval_rouge2": 14.2141, |
|
"eval_rougeL": 22.3733, |
|
"eval_rougeLsum": 30.8328, |
|
"eval_runtime": 1780.0926, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1494, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.0005, |
|
"loss": 0.118, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1499, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1369, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1084, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0005, |
|
"loss": 0.117, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.0005, |
|
"loss": 0.144, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1449, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_gen_len": 357.31360946745565, |
|
"eval_loss": 2.5508346557617188, |
|
"eval_rouge1": 35.3448, |
|
"eval_rouge2": 13.8478, |
|
"eval_rougeL": 24.9044, |
|
"eval_rougeLsum": 33.6108, |
|
"eval_runtime": 1768.966, |
|
"eval_samples_per_second": 0.191, |
|
"eval_steps_per_second": 0.024, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1101, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0985, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1101, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1013, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1057, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1102, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1191, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_gen_len": 408.86686390532543, |
|
"eval_loss": 3.162177324295044, |
|
"eval_rouge1": 37.2189, |
|
"eval_rouge2": 16.0076, |
|
"eval_rougeL": 25.7011, |
|
"eval_rougeLsum": 35.294, |
|
"eval_runtime": 1778.8704, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0949, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0974, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.0005, |
|
"loss": 0.096, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1023, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0829, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0879, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 318.2278106508876, |
|
"eval_loss": 2.8510310649871826, |
|
"eval_rouge1": 39.8825, |
|
"eval_rouge2": 16.8073, |
|
"eval_rougeL": 27.2428, |
|
"eval_rougeLsum": 37.9568, |
|
"eval_runtime": 1776.4341, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0957, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0991, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.0005, |
|
"loss": 0.095, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0999, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0787, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0716, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0899, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_gen_len": 500.405325443787, |
|
"eval_loss": 2.9137675762176514, |
|
"eval_rouge1": 31.7139, |
|
"eval_rouge2": 13.7066, |
|
"eval_rougeL": 21.8844, |
|
"eval_rougeLsum": 30.5075, |
|
"eval_runtime": 1780.3113, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0945, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0597, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0586, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0757, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0665, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0594, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0656, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_gen_len": 488.1686390532544, |
|
"eval_loss": 3.1616334915161133, |
|
"eval_rouge1": 33.055, |
|
"eval_rouge2": 14.5841, |
|
"eval_rougeL": 22.5883, |
|
"eval_rougeLsum": 31.7565, |
|
"eval_runtime": 1782.2406, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0613, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0621, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0739, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0841, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0562, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0611, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0607, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0542, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_gen_len": 198.80769230769232, |
|
"eval_loss": 3.3630056381225586, |
|
"eval_rouge1": 43.7514, |
|
"eval_rouge2": 18.9011, |
|
"eval_rougeL": 29.9017, |
|
"eval_rougeLsum": 41.6887, |
|
"eval_runtime": 1460.3937, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.029, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0548, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0477, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 0.0005, |
|
"loss": 0.052, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.0005, |
|
"loss": 0.053, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0525, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0555, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0557, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"eval_gen_len": 270.9674556213018, |
|
"eval_loss": 3.3825831413269043, |
|
"eval_rouge1": 42.3089, |
|
"eval_rouge2": 18.2735, |
|
"eval_rougeL": 29.0356, |
|
"eval_rougeLsum": 40.4154, |
|
"eval_runtime": 1704.6973, |
|
"eval_samples_per_second": 0.198, |
|
"eval_steps_per_second": 0.025, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0513, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0419, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0525, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0535, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0611, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0444, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0542, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"eval_gen_len": 186.73076923076923, |
|
"eval_loss": 3.4408490657806396, |
|
"eval_rouge1": 40.7691, |
|
"eval_rouge2": 16.529, |
|
"eval_rougeL": 28.3999, |
|
"eval_rougeLsum": 38.9723, |
|
"eval_runtime": 1525.6668, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.028, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0602, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0438, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0503, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.0005, |
|
"loss": 0.046, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0368, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0572, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0596, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_gen_len": 398.4704142011834, |
|
"eval_loss": 3.525272846221924, |
|
"eval_rouge1": 37.0037, |
|
"eval_rouge2": 15.9098, |
|
"eval_rougeL": 25.2808, |
|
"eval_rougeLsum": 35.3868, |
|
"eval_runtime": 1778.3289, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0434, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0453, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0453, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0586, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 0.0005, |
|
"loss": 0.0385, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"eval_gen_len": 499.31065088757396, |
|
"eval_loss": 3.498972177505493, |
|
"eval_rouge1": 32.5815, |
|
"eval_rouge2": 14.2951, |
|
"eval_rougeL": 22.4501, |
|
"eval_rougeLsum": 31.2928, |
|
"eval_runtime": 1779.9602, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.024, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"step": 210, |
|
"total_flos": 3.6715371967648973e+18, |
|
"train_loss": 0.13047422234501158, |
|
"train_runtime": 78176.4332, |
|
"train_samples_per_second": 0.705, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 210, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 3.6715371967648973e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|