|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 107670, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9953561809231916e-05, |
|
"loss": 2.0977, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.990712361846382e-05, |
|
"loss": 2.0174, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.986068542769574e-05, |
|
"loss": 1.9844, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9814247236927655e-05, |
|
"loss": 1.9164, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.976780904615956e-05, |
|
"loss": 1.9451, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9721370855391474e-05, |
|
"loss": 1.8884, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.967493266462339e-05, |
|
"loss": 1.9468, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.96284944738553e-05, |
|
"loss": 1.9071, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9582056283087214e-05, |
|
"loss": 1.9196, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.953561809231913e-05, |
|
"loss": 1.9623, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.7499970197677612, |
|
"eval_rouge1": 24.1039, |
|
"eval_rouge2": 11.368, |
|
"eval_rougeL": 19.813, |
|
"eval_rougeLsum": 22.671, |
|
"eval_runtime": 1022.1998, |
|
"eval_samples_per_second": 13.078, |
|
"eval_steps_per_second": 1.635, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.948917990155103e-05, |
|
"loss": 1.9252, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.944274171078295e-05, |
|
"loss": 1.9163, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9396303520014866e-05, |
|
"loss": 1.9168, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.934986532924677e-05, |
|
"loss": 1.9531, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9303427138478686e-05, |
|
"loss": 1.9297, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.92569889477106e-05, |
|
"loss": 1.9387, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.921055075694251e-05, |
|
"loss": 1.9241, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9164112566174425e-05, |
|
"loss": 1.8848, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.911767437540634e-05, |
|
"loss": 1.8753, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9071236184638245e-05, |
|
"loss": 1.8827, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.7382432222366333, |
|
"eval_rouge1": 24.1445, |
|
"eval_rouge2": 11.4497, |
|
"eval_rougeL": 19.8683, |
|
"eval_rougeLsum": 22.7376, |
|
"eval_runtime": 1035.6817, |
|
"eval_samples_per_second": 12.907, |
|
"eval_steps_per_second": 1.613, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.902479799387016e-05, |
|
"loss": 1.881, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.897835980310208e-05, |
|
"loss": 1.9097, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8931921612333984e-05, |
|
"loss": 1.9231, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.88854834215659e-05, |
|
"loss": 1.9248, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.883904523079781e-05, |
|
"loss": 1.8999, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8792607040029724e-05, |
|
"loss": 1.9033, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.874616884926164e-05, |
|
"loss": 1.8797, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.869973065849355e-05, |
|
"loss": 1.8714, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8653292467725456e-05, |
|
"loss": 1.9069, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.860685427695737e-05, |
|
"loss": 1.8988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.7309753894805908, |
|
"eval_rouge1": 24.329, |
|
"eval_rouge2": 11.5899, |
|
"eval_rougeL": 20.0409, |
|
"eval_rougeLsum": 22.9104, |
|
"eval_runtime": 1044.308, |
|
"eval_samples_per_second": 12.801, |
|
"eval_steps_per_second": 1.6, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.856041608618929e-05, |
|
"loss": 1.9302, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8513977895421196e-05, |
|
"loss": 1.8743, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.846753970465311e-05, |
|
"loss": 1.9361, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.842110151388502e-05, |
|
"loss": 1.8986, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.837466332311693e-05, |
|
"loss": 1.8377, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.832822513234885e-05, |
|
"loss": 1.8887, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.828178694158076e-05, |
|
"loss": 1.9125, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.823534875081267e-05, |
|
"loss": 1.8511, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.818891056004458e-05, |
|
"loss": 1.8827, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.81424723692765e-05, |
|
"loss": 1.8778, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.7176508903503418, |
|
"eval_rouge1": 24.3886, |
|
"eval_rouge2": 11.6472, |
|
"eval_rougeL": 20.1048, |
|
"eval_rougeLsum": 22.988, |
|
"eval_runtime": 1029.4955, |
|
"eval_samples_per_second": 12.985, |
|
"eval_steps_per_second": 1.623, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.809603417850841e-05, |
|
"loss": 1.9031, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.804959598774032e-05, |
|
"loss": 1.8762, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.800315779697223e-05, |
|
"loss": 1.926, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.795671960620414e-05, |
|
"loss": 1.8523, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.791028141543606e-05, |
|
"loss": 1.9052, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.786384322466797e-05, |
|
"loss": 1.8762, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.781740503389988e-05, |
|
"loss": 1.9174, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.777096684313179e-05, |
|
"loss": 1.8726, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7724528652363705e-05, |
|
"loss": 1.8676, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.767809046159562e-05, |
|
"loss": 1.9173, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.7139595746994019, |
|
"eval_rouge1": 24.3508, |
|
"eval_rouge2": 11.5594, |
|
"eval_rougeL": 20.075, |
|
"eval_rougeLsum": 22.932, |
|
"eval_runtime": 1027.8379, |
|
"eval_samples_per_second": 13.006, |
|
"eval_steps_per_second": 1.626, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.763165227082753e-05, |
|
"loss": 1.8869, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7585214080059445e-05, |
|
"loss": 1.8744, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.753877588929135e-05, |
|
"loss": 1.8609, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.749233769852327e-05, |
|
"loss": 1.896, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7445899507755184e-05, |
|
"loss": 1.8804, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.739946131698709e-05, |
|
"loss": 1.8525, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7353023126219004e-05, |
|
"loss": 1.8503, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.730658493545092e-05, |
|
"loss": 1.8688, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.726014674468283e-05, |
|
"loss": 1.8533, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.721370855391474e-05, |
|
"loss": 1.9009, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 1.7133935689926147, |
|
"eval_rouge1": 24.28, |
|
"eval_rouge2": 11.6075, |
|
"eval_rougeL": 20.0581, |
|
"eval_rougeLsum": 22.8833, |
|
"eval_runtime": 1031.4088, |
|
"eval_samples_per_second": 12.961, |
|
"eval_steps_per_second": 1.62, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7167270363146656e-05, |
|
"loss": 1.8704, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.712083217237856e-05, |
|
"loss": 1.8674, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7074393981610476e-05, |
|
"loss": 1.8743, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7027955790842396e-05, |
|
"loss": 1.8418, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.69815176000743e-05, |
|
"loss": 1.8522, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6935079409306215e-05, |
|
"loss": 1.8921, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.688864121853813e-05, |
|
"loss": 1.8509, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.684220302777004e-05, |
|
"loss": 1.8358, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6795764837001954e-05, |
|
"loss": 1.8946, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.674932664623387e-05, |
|
"loss": 1.8975, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_gen_len": 18.999401555954517, |
|
"eval_loss": 1.7081294059753418, |
|
"eval_rouge1": 24.3203, |
|
"eval_rouge2": 11.6175, |
|
"eval_rougeL": 20.035, |
|
"eval_rougeLsum": 22.9167, |
|
"eval_runtime": 1038.9823, |
|
"eval_samples_per_second": 12.866, |
|
"eval_steps_per_second": 1.608, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6702888455465774e-05, |
|
"loss": 1.8714, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.665645026469769e-05, |
|
"loss": 1.8574, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.661001207392961e-05, |
|
"loss": 1.8721, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.656357388316151e-05, |
|
"loss": 1.8528, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6517135692393426e-05, |
|
"loss": 1.8688, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.647069750162534e-05, |
|
"loss": 1.8845, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6424259310857246e-05, |
|
"loss": 1.85, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6377821120089166e-05, |
|
"loss": 1.8522, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.633138292932108e-05, |
|
"loss": 1.8589, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6284944738552985e-05, |
|
"loss": 1.8835, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.7061101198196411, |
|
"eval_rouge1": 24.2729, |
|
"eval_rouge2": 11.6324, |
|
"eval_rougeL": 20.0728, |
|
"eval_rougeLsum": 22.8747, |
|
"eval_runtime": 1042.8871, |
|
"eval_samples_per_second": 12.818, |
|
"eval_steps_per_second": 1.602, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.62385065477849e-05, |
|
"loss": 1.8593, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.619206835701682e-05, |
|
"loss": 1.846, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6145630166248725e-05, |
|
"loss": 1.8914, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.609919197548064e-05, |
|
"loss": 1.8554, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.605275378471255e-05, |
|
"loss": 1.901, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.600631559394446e-05, |
|
"loss": 1.893, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.595987740317638e-05, |
|
"loss": 1.8421, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.591343921240829e-05, |
|
"loss": 1.8392, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.58670010216402e-05, |
|
"loss": 1.9094, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.582056283087211e-05, |
|
"loss": 1.8725, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.6995261907577515, |
|
"eval_rouge1": 24.2542, |
|
"eval_rouge2": 11.5763, |
|
"eval_rougeL": 20.0241, |
|
"eval_rougeLsum": 22.8713, |
|
"eval_runtime": 1028.0211, |
|
"eval_samples_per_second": 13.004, |
|
"eval_steps_per_second": 1.625, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.577412464010402e-05, |
|
"loss": 1.8004, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5727686449335936e-05, |
|
"loss": 1.8418, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.568124825856785e-05, |
|
"loss": 1.833, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.563481006779976e-05, |
|
"loss": 1.8684, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.558837187703167e-05, |
|
"loss": 1.8496, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.554193368626358e-05, |
|
"loss": 1.8498, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.54954954954955e-05, |
|
"loss": 1.869, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.544905730472741e-05, |
|
"loss": 1.863, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.540261911395932e-05, |
|
"loss": 1.8227, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5356180923191235e-05, |
|
"loss": 1.837, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 1.6998443603515625, |
|
"eval_rouge1": 24.3321, |
|
"eval_rouge2": 11.599, |
|
"eval_rougeL": 20.1028, |
|
"eval_rougeLsum": 22.9562, |
|
"eval_runtime": 1035.1096, |
|
"eval_samples_per_second": 12.915, |
|
"eval_steps_per_second": 1.614, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.530974273242315e-05, |
|
"loss": 1.8373, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.526330454165506e-05, |
|
"loss": 1.8523, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5216866350886974e-05, |
|
"loss": 1.8533, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.517042816011888e-05, |
|
"loss": 1.8649, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5123989969350793e-05, |
|
"loss": 1.845, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.507755177858271e-05, |
|
"loss": 1.8403, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.503111358781462e-05, |
|
"loss": 1.8796, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.498467539704653e-05, |
|
"loss": 1.8508, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4938237206278446e-05, |
|
"loss": 1.9008, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.489179901551035e-05, |
|
"loss": 1.8629, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.6944048404693604, |
|
"eval_rouge1": 24.4161, |
|
"eval_rouge2": 11.6208, |
|
"eval_rougeL": 20.1374, |
|
"eval_rougeLsum": 23.024, |
|
"eval_runtime": 1022.6375, |
|
"eval_samples_per_second": 13.072, |
|
"eval_steps_per_second": 1.634, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.484536082474227e-05, |
|
"loss": 1.8599, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4798922633974185e-05, |
|
"loss": 1.85, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.475248444320609e-05, |
|
"loss": 1.8512, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4706046252438005e-05, |
|
"loss": 1.8551, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4659608061669925e-05, |
|
"loss": 1.8278, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.461316987090183e-05, |
|
"loss": 1.8991, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4566731680133744e-05, |
|
"loss": 1.8555, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.452029348936566e-05, |
|
"loss": 1.8394, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4473855298597564e-05, |
|
"loss": 1.8479, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4427417107829484e-05, |
|
"loss": 1.85, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_gen_len": 18.999027528426094, |
|
"eval_loss": 1.70017409324646, |
|
"eval_rouge1": 24.3514, |
|
"eval_rouge2": 11.6883, |
|
"eval_rougeL": 20.134, |
|
"eval_rougeLsum": 22.9515, |
|
"eval_runtime": 1029.569, |
|
"eval_samples_per_second": 12.984, |
|
"eval_steps_per_second": 1.623, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.43809789170614e-05, |
|
"loss": 1.8509, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.43345407262933e-05, |
|
"loss": 1.846, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4288102535525216e-05, |
|
"loss": 1.8238, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.424166434475713e-05, |
|
"loss": 1.8519, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.419522615398904e-05, |
|
"loss": 1.8366, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4148787963220956e-05, |
|
"loss": 1.8822, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.410234977245287e-05, |
|
"loss": 1.864, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4055911581684775e-05, |
|
"loss": 1.8388, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4009473390916695e-05, |
|
"loss": 1.8891, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.396303520014861e-05, |
|
"loss": 1.8506, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_gen_len": 18.998653500897667, |
|
"eval_loss": 1.6894251108169556, |
|
"eval_rouge1": 24.3812, |
|
"eval_rouge2": 11.6592, |
|
"eval_rougeL": 20.1641, |
|
"eval_rougeLsum": 23.0108, |
|
"eval_runtime": 1023.0391, |
|
"eval_samples_per_second": 13.067, |
|
"eval_steps_per_second": 1.633, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3916597009380515e-05, |
|
"loss": 1.8513, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.387015881861243e-05, |
|
"loss": 1.9267, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.382372062784434e-05, |
|
"loss": 1.8639, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3777282437076254e-05, |
|
"loss": 1.8481, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.373084424630817e-05, |
|
"loss": 1.83, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.368440605554008e-05, |
|
"loss": 1.8801, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3637967864771987e-05, |
|
"loss": 1.8352, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.35915296740039e-05, |
|
"loss": 1.8361, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.354509148323582e-05, |
|
"loss": 1.8487, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3498653292467726e-05, |
|
"loss": 1.8869, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 1.6880699396133423, |
|
"eval_rouge1": 24.3956, |
|
"eval_rouge2": 11.6817, |
|
"eval_rougeL": 20.1654, |
|
"eval_rougeLsum": 23.0284, |
|
"eval_runtime": 1045.0589, |
|
"eval_samples_per_second": 12.792, |
|
"eval_steps_per_second": 1.599, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.345221510169964e-05, |
|
"loss": 1.8472, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.340577691093155e-05, |
|
"loss": 1.8419, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3359338720163465e-05, |
|
"loss": 1.8654, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.331290052939538e-05, |
|
"loss": 1.8605, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.326646233862729e-05, |
|
"loss": 1.8943, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.32200241478592e-05, |
|
"loss": 1.8725, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.317358595709111e-05, |
|
"loss": 1.8538, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.312714776632303e-05, |
|
"loss": 1.8818, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.308070957555494e-05, |
|
"loss": 1.836, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.303427138478685e-05, |
|
"loss": 1.8327, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_gen_len": 18.999251944943147, |
|
"eval_loss": 1.690325379371643, |
|
"eval_rouge1": 24.3707, |
|
"eval_rouge2": 11.6446, |
|
"eval_rougeL": 20.1353, |
|
"eval_rougeLsum": 22.9801, |
|
"eval_runtime": 1037.5831, |
|
"eval_samples_per_second": 12.884, |
|
"eval_steps_per_second": 1.61, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2987833194018764e-05, |
|
"loss": 1.8389, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.294139500325067e-05, |
|
"loss": 1.8339, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.289495681248259e-05, |
|
"loss": 1.8757, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.28485186217145e-05, |
|
"loss": 1.8118, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.280208043094641e-05, |
|
"loss": 1.8513, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.275564224017832e-05, |
|
"loss": 1.8791, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.270920404941024e-05, |
|
"loss": 1.8822, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.266276585864215e-05, |
|
"loss": 1.8793, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.261632766787406e-05, |
|
"loss": 1.8132, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2569889477105975e-05, |
|
"loss": 1.8204, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.6896474361419678, |
|
"eval_rouge1": 24.3663, |
|
"eval_rouge2": 11.6963, |
|
"eval_rougeL": 20.1357, |
|
"eval_rougeLsum": 22.9898, |
|
"eval_runtime": 1023.2223, |
|
"eval_samples_per_second": 13.065, |
|
"eval_steps_per_second": 1.633, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.252345128633788e-05, |
|
"loss": 1.8534, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.24770130955698e-05, |
|
"loss": 1.7864, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2430574904801715e-05, |
|
"loss": 1.8649, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.238413671403362e-05, |
|
"loss": 1.8562, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2337698523265534e-05, |
|
"loss": 1.8897, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.229126033249745e-05, |
|
"loss": 1.8949, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.224482214172936e-05, |
|
"loss": 1.8548, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2198383950961273e-05, |
|
"loss": 1.8514, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2151945760193187e-05, |
|
"loss": 1.8525, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.210550756942509e-05, |
|
"loss": 1.8764, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_gen_len": 18.997830640335128, |
|
"eval_loss": 1.6845980882644653, |
|
"eval_rouge1": 24.4212, |
|
"eval_rouge2": 11.652, |
|
"eval_rougeL": 20.1455, |
|
"eval_rougeLsum": 23.0326, |
|
"eval_runtime": 1022.189, |
|
"eval_samples_per_second": 13.078, |
|
"eval_steps_per_second": 1.635, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.205906937865701e-05, |
|
"loss": 1.8621, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2012631187888926e-05, |
|
"loss": 1.8591, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.196619299712083e-05, |
|
"loss": 1.8448, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1919754806352745e-05, |
|
"loss": 1.8966, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.187331661558466e-05, |
|
"loss": 1.8307, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.182687842481657e-05, |
|
"loss": 1.8801, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1780440234048485e-05, |
|
"loss": 1.8006, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.17340020432804e-05, |
|
"loss": 1.8394, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1687563852512304e-05, |
|
"loss": 1.8494, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.164112566174422e-05, |
|
"loss": 1.8213, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_gen_len": 18.999177139437464, |
|
"eval_loss": 1.6816905736923218, |
|
"eval_rouge1": 24.452, |
|
"eval_rouge2": 11.7014, |
|
"eval_rougeL": 20.1898, |
|
"eval_rougeLsum": 23.0668, |
|
"eval_runtime": 1033.0047, |
|
"eval_samples_per_second": 12.941, |
|
"eval_steps_per_second": 1.618, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.159468747097614e-05, |
|
"loss": 1.8428, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1548249280208044e-05, |
|
"loss": 1.8512, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.150181108943996e-05, |
|
"loss": 1.8383, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.145537289867187e-05, |
|
"loss": 1.8308, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.140893470790378e-05, |
|
"loss": 1.8504, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1362496517135696e-05, |
|
"loss": 1.84, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.131605832636761e-05, |
|
"loss": 1.8717, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1269620135599516e-05, |
|
"loss": 1.8768, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.122318194483143e-05, |
|
"loss": 1.8493, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.117674375406335e-05, |
|
"loss": 1.8424, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_gen_len": 18.998952722920407, |
|
"eval_loss": 1.6843751668930054, |
|
"eval_rouge1": 24.4206, |
|
"eval_rouge2": 11.7049, |
|
"eval_rougeL": 20.1931, |
|
"eval_rougeLsum": 23.0358, |
|
"eval_runtime": 1022.1365, |
|
"eval_samples_per_second": 13.078, |
|
"eval_steps_per_second": 1.635, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1130305563295255e-05, |
|
"loss": 1.8629, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.108386737252717e-05, |
|
"loss": 1.8285, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.103742918175908e-05, |
|
"loss": 1.8237, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.099099099099099e-05, |
|
"loss": 1.833, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.094455280022291e-05, |
|
"loss": 1.8349, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.089811460945482e-05, |
|
"loss": 1.8581, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.085167641868673e-05, |
|
"loss": 1.8623, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.080523822791864e-05, |
|
"loss": 1.8653, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0758800037150553e-05, |
|
"loss": 1.8228, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.071236184638247e-05, |
|
"loss": 1.8721, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6814035177230835, |
|
"eval_rouge1": 24.4483, |
|
"eval_rouge2": 11.6789, |
|
"eval_rougeL": 20.1798, |
|
"eval_rougeLsum": 23.0508, |
|
"eval_runtime": 1031.5868, |
|
"eval_samples_per_second": 12.959, |
|
"eval_steps_per_second": 1.62, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.066592365561438e-05, |
|
"loss": 1.8544, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.061948546484629e-05, |
|
"loss": 1.8699, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.05730472740782e-05, |
|
"loss": 1.8262, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.052660908331012e-05, |
|
"loss": 1.8274, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.048017089254203e-05, |
|
"loss": 1.8406, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.043373270177394e-05, |
|
"loss": 1.8882, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.038729451100585e-05, |
|
"loss": 1.8563, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0340856320237765e-05, |
|
"loss": 1.861, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.029441812946968e-05, |
|
"loss": 1.8437, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.024797993870159e-05, |
|
"loss": 1.87, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6796290874481201, |
|
"eval_rouge1": 24.4799, |
|
"eval_rouge2": 11.6789, |
|
"eval_rougeL": 20.1919, |
|
"eval_rougeLsum": 23.0831, |
|
"eval_runtime": 1027.2545, |
|
"eval_samples_per_second": 13.013, |
|
"eval_steps_per_second": 1.627, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0201541747933504e-05, |
|
"loss": 1.8364, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.015510355716541e-05, |
|
"loss": 1.8659, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0108665366397324e-05, |
|
"loss": 1.8246, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0062227175629244e-05, |
|
"loss": 1.8523, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.001578898486115e-05, |
|
"loss": 1.8709, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.996935079409306e-05, |
|
"loss": 1.838, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9922912603324976e-05, |
|
"loss": 1.8308, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.987647441255689e-05, |
|
"loss": 1.8366, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.98300362217888e-05, |
|
"loss": 1.8366, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9783598031020716e-05, |
|
"loss": 1.844, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6769678592681885, |
|
"eval_rouge1": 24.4741, |
|
"eval_rouge2": 11.7433, |
|
"eval_rougeL": 20.2031, |
|
"eval_rougeLsum": 23.0535, |
|
"eval_runtime": 1029.6436, |
|
"eval_samples_per_second": 12.983, |
|
"eval_steps_per_second": 1.623, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.973715984025262e-05, |
|
"loss": 1.8852, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9690721649484535e-05, |
|
"loss": 1.8251, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9644283458716455e-05, |
|
"loss": 1.8515, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.959784526794836e-05, |
|
"loss": 1.8349, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9551407077180275e-05, |
|
"loss": 1.813, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.950496888641219e-05, |
|
"loss": 1.8574, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9458530695644094e-05, |
|
"loss": 1.8541, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9412092504876014e-05, |
|
"loss": 1.8456, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.936565431410793e-05, |
|
"loss": 1.8051, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9319216123339834e-05, |
|
"loss": 1.8611, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_gen_len": 18.99857869539198, |
|
"eval_loss": 1.6784615516662598, |
|
"eval_rouge1": 24.4837, |
|
"eval_rouge2": 11.7572, |
|
"eval_rougeL": 20.219, |
|
"eval_rougeLsum": 23.088, |
|
"eval_runtime": 1026.7838, |
|
"eval_samples_per_second": 13.019, |
|
"eval_steps_per_second": 1.627, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.927277793257175e-05, |
|
"loss": 1.8259, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9226339741803667e-05, |
|
"loss": 1.8995, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.917990155103557e-05, |
|
"loss": 1.8324, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9133463360267486e-05, |
|
"loss": 1.8685, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.90870251694994e-05, |
|
"loss": 1.8533, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9040586978731306e-05, |
|
"loss": 1.8458, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.8994148787963225e-05, |
|
"loss": 1.8748, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.894771059719514e-05, |
|
"loss": 1.8468, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8901272406427045e-05, |
|
"loss": 1.8464, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.885483421565896e-05, |
|
"loss": 1.8201, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.679620385169983, |
|
"eval_rouge1": 24.3955, |
|
"eval_rouge2": 11.6978, |
|
"eval_rougeL": 20.173, |
|
"eval_rougeLsum": 23.0302, |
|
"eval_runtime": 1032.6621, |
|
"eval_samples_per_second": 12.945, |
|
"eval_steps_per_second": 1.618, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.880839602489087e-05, |
|
"loss": 1.8332, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8761957834122784e-05, |
|
"loss": 1.8076, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.87155196433547e-05, |
|
"loss": 1.8539, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.866908145258661e-05, |
|
"loss": 1.832, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.862264326181852e-05, |
|
"loss": 1.8395, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.857620507105044e-05, |
|
"loss": 1.8648, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.852976688028235e-05, |
|
"loss": 1.8629, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8483328689514256e-05, |
|
"loss": 1.8374, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.843689049874617e-05, |
|
"loss": 1.8363, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.839045230797808e-05, |
|
"loss": 1.8506, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 1.6769764423370361, |
|
"eval_rouge1": 24.4084, |
|
"eval_rouge2": 11.711, |
|
"eval_rougeL": 20.1851, |
|
"eval_rougeLsum": 23.0266, |
|
"eval_runtime": 1026.6424, |
|
"eval_samples_per_second": 13.021, |
|
"eval_steps_per_second": 1.628, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8344014117209996e-05, |
|
"loss": 1.8393, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.829757592644191e-05, |
|
"loss": 1.8468, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.825113773567382e-05, |
|
"loss": 1.8548, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.820469954490573e-05, |
|
"loss": 1.8017, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.815826135413764e-05, |
|
"loss": 1.8355, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.811182316336956e-05, |
|
"loss": 1.841, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.806538497260147e-05, |
|
"loss": 1.8751, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.801894678183338e-05, |
|
"loss": 1.8258, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7972508591065294e-05, |
|
"loss": 1.8243, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.792607040029721e-05, |
|
"loss": 1.846, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_gen_len": 18.998952722920407, |
|
"eval_loss": 1.6765128374099731, |
|
"eval_rouge1": 24.4272, |
|
"eval_rouge2": 11.6779, |
|
"eval_rougeL": 20.1785, |
|
"eval_rougeLsum": 23.0352, |
|
"eval_runtime": 1042.1434, |
|
"eval_samples_per_second": 12.827, |
|
"eval_steps_per_second": 1.603, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.787963220952912e-05, |
|
"loss": 1.8538, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7833194018761034e-05, |
|
"loss": 1.8498, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.778675582799294e-05, |
|
"loss": 1.8562, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.774031763722485e-05, |
|
"loss": 1.8432, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.769387944645677e-05, |
|
"loss": 1.871, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.764744125568868e-05, |
|
"loss": 1.8444, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.760100306492059e-05, |
|
"loss": 1.8634, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7554564874152506e-05, |
|
"loss": 1.8377, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.750812668338441e-05, |
|
"loss": 1.862, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.746168849261633e-05, |
|
"loss": 1.8431, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6757032871246338, |
|
"eval_rouge1": 24.4484, |
|
"eval_rouge2": 11.7154, |
|
"eval_rougeL": 20.2156, |
|
"eval_rougeLsum": 23.0646, |
|
"eval_runtime": 1031.148, |
|
"eval_samples_per_second": 12.964, |
|
"eval_steps_per_second": 1.621, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7415250301848245e-05, |
|
"loss": 1.8086, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.736881211108015e-05, |
|
"loss": 1.8057, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7322373920312064e-05, |
|
"loss": 1.8122, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7275935729543984e-05, |
|
"loss": 1.8436, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.722949753877589e-05, |
|
"loss": 1.8109, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7183059348007804e-05, |
|
"loss": 1.8281, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.713662115723972e-05, |
|
"loss": 1.8217, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.709018296647162e-05, |
|
"loss": 1.8251, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.704374477570354e-05, |
|
"loss": 1.814, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6997306584935456e-05, |
|
"loss": 1.8208, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.6763582229614258, |
|
"eval_rouge1": 24.412, |
|
"eval_rouge2": 11.6887, |
|
"eval_rougeL": 20.1752, |
|
"eval_rougeLsum": 23.0151, |
|
"eval_runtime": 1029.8014, |
|
"eval_samples_per_second": 12.981, |
|
"eval_steps_per_second": 1.623, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.695086839416736e-05, |
|
"loss": 1.8216, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6904430203399276e-05, |
|
"loss": 1.8218, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.685799201263119e-05, |
|
"loss": 1.8148, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.68115538218631e-05, |
|
"loss": 1.8116, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6765115631095015e-05, |
|
"loss": 1.8221, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.671867744032693e-05, |
|
"loss": 1.8084, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6672239249558835e-05, |
|
"loss": 1.8018, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.662580105879075e-05, |
|
"loss": 1.8693, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.657936286802267e-05, |
|
"loss": 1.8269, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6532924677254574e-05, |
|
"loss": 1.8108, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 1.673341155052185, |
|
"eval_rouge1": 24.4051, |
|
"eval_rouge2": 11.7155, |
|
"eval_rougeL": 20.1773, |
|
"eval_rougeLsum": 23.0215, |
|
"eval_runtime": 1021.7828, |
|
"eval_samples_per_second": 13.083, |
|
"eval_steps_per_second": 1.635, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.648648648648649e-05, |
|
"loss": 1.8227, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.64400482957184e-05, |
|
"loss": 1.7996, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6393610104950314e-05, |
|
"loss": 1.8666, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.634717191418223e-05, |
|
"loss": 1.8661, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.630073372341414e-05, |
|
"loss": 1.8567, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6254295532646046e-05, |
|
"loss": 1.8299, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.620785734187796e-05, |
|
"loss": 1.8452, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.616141915110988e-05, |
|
"loss": 1.8044, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6114980960341786e-05, |
|
"loss": 1.7938, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.60685427695737e-05, |
|
"loss": 1.847, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_gen_len": 18.999401555954517, |
|
"eval_loss": 1.6737741231918335, |
|
"eval_rouge1": 24.5531, |
|
"eval_rouge2": 11.7949, |
|
"eval_rougeL": 20.2834, |
|
"eval_rougeLsum": 23.1588, |
|
"eval_runtime": 1026.8053, |
|
"eval_samples_per_second": 13.019, |
|
"eval_steps_per_second": 1.627, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.602210457880561e-05, |
|
"loss": 1.8155, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.597566638803752e-05, |
|
"loss": 1.8121, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.592922819726944e-05, |
|
"loss": 1.8644, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.588279000650135e-05, |
|
"loss": 1.8362, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.583635181573326e-05, |
|
"loss": 1.8361, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.578991362496517e-05, |
|
"loss": 1.8711, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.574347543419709e-05, |
|
"loss": 1.8515, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5697037243429e-05, |
|
"loss": 1.8456, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.565059905266091e-05, |
|
"loss": 1.8257, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.560416086189282e-05, |
|
"loss": 1.8386, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_gen_len": 18.999102333931777, |
|
"eval_loss": 1.667382836341858, |
|
"eval_rouge1": 24.5155, |
|
"eval_rouge2": 11.7333, |
|
"eval_rougeL": 20.2529, |
|
"eval_rougeLsum": 23.145, |
|
"eval_runtime": 1035.863, |
|
"eval_samples_per_second": 12.905, |
|
"eval_steps_per_second": 1.613, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.555772267112473e-05, |
|
"loss": 1.8316, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.551128448035665e-05, |
|
"loss": 1.8511, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.546484628958856e-05, |
|
"loss": 1.8526, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.541840809882047e-05, |
|
"loss": 1.8468, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.537196990805238e-05, |
|
"loss": 1.8796, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5325531717284295e-05, |
|
"loss": 1.8526, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.527909352651621e-05, |
|
"loss": 1.8418, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.523265533574812e-05, |
|
"loss": 1.8185, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5186217144980035e-05, |
|
"loss": 1.8886, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.513977895421194e-05, |
|
"loss": 1.82, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_gen_len": 18.998803111909037, |
|
"eval_loss": 1.66925048828125, |
|
"eval_rouge1": 24.4498, |
|
"eval_rouge2": 11.7118, |
|
"eval_rougeL": 20.2183, |
|
"eval_rougeLsum": 23.0767, |
|
"eval_runtime": 1043.8464, |
|
"eval_samples_per_second": 12.806, |
|
"eval_steps_per_second": 1.601, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.509334076344386e-05, |
|
"loss": 1.8643, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.5046902572675774e-05, |
|
"loss": 1.7943, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.500046438190768e-05, |
|
"loss": 1.8089, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4954026191139594e-05, |
|
"loss": 1.7881, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.490758800037151e-05, |
|
"loss": 1.8543, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.486114980960342e-05, |
|
"loss": 1.8068, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.481471161883533e-05, |
|
"loss": 1.8266, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4768273428067246e-05, |
|
"loss": 1.8328, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.472183523729915e-05, |
|
"loss": 1.8644, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4675397046531066e-05, |
|
"loss": 1.8475, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_gen_len": 18.999251944943147, |
|
"eval_loss": 1.6676273345947266, |
|
"eval_rouge1": 24.442, |
|
"eval_rouge2": 11.676, |
|
"eval_rougeL": 20.168, |
|
"eval_rougeLsum": 23.0409, |
|
"eval_runtime": 1028.9174, |
|
"eval_samples_per_second": 12.992, |
|
"eval_steps_per_second": 1.624, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4628958855762986e-05, |
|
"loss": 1.8232, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.458252066499489e-05, |
|
"loss": 1.8512, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4536082474226805e-05, |
|
"loss": 1.8338, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.448964428345872e-05, |
|
"loss": 1.8103, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.444320609269063e-05, |
|
"loss": 1.8454, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4396767901922544e-05, |
|
"loss": 1.8433, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.435032971115446e-05, |
|
"loss": 1.868, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4303891520386364e-05, |
|
"loss": 1.794, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.425745332961828e-05, |
|
"loss": 1.7941, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.42110151388502e-05, |
|
"loss": 1.7948, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_gen_len": 18.999027528426094, |
|
"eval_loss": 1.6689342260360718, |
|
"eval_rouge1": 24.4561, |
|
"eval_rouge2": 11.7865, |
|
"eval_rougeL": 20.2446, |
|
"eval_rougeLsum": 23.0707, |
|
"eval_runtime": 1028.9964, |
|
"eval_samples_per_second": 12.991, |
|
"eval_steps_per_second": 1.624, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.41645769480821e-05, |
|
"loss": 1.8232, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4118138757314017e-05, |
|
"loss": 1.8209, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.407170056654593e-05, |
|
"loss": 1.8718, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4025262375777836e-05, |
|
"loss": 1.8139, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3978824185009756e-05, |
|
"loss": 1.8332, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.393238599424167e-05, |
|
"loss": 1.8244, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3885947803473575e-05, |
|
"loss": 1.7957, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.383950961270549e-05, |
|
"loss": 1.8466, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.379307142193741e-05, |
|
"loss": 1.8225, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.3746633231169315e-05, |
|
"loss": 1.8357, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_gen_len": 18.999401555954517, |
|
"eval_loss": 1.6757416725158691, |
|
"eval_rouge1": 24.4005, |
|
"eval_rouge2": 11.7299, |
|
"eval_rougeL": 20.1999, |
|
"eval_rougeLsum": 23.0093, |
|
"eval_runtime": 1030.4474, |
|
"eval_samples_per_second": 12.973, |
|
"eval_steps_per_second": 1.622, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.370019504040123e-05, |
|
"loss": 1.8064, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.365375684963314e-05, |
|
"loss": 1.8333, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.360731865886505e-05, |
|
"loss": 1.8368, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.356088046809697e-05, |
|
"loss": 1.8732, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.351444227732888e-05, |
|
"loss": 1.8425, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.346800408656079e-05, |
|
"loss": 1.7983, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.34215658957927e-05, |
|
"loss": 1.8615, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.337512770502461e-05, |
|
"loss": 1.8414, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3328689514256526e-05, |
|
"loss": 1.8695, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.328225132348844e-05, |
|
"loss": 1.8624, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.998803111909037, |
|
"eval_loss": 1.674521803855896, |
|
"eval_rouge1": 24.3371, |
|
"eval_rouge2": 11.6749, |
|
"eval_rougeL": 20.1257, |
|
"eval_rougeLsum": 22.9428, |
|
"eval_runtime": 1027.0502, |
|
"eval_samples_per_second": 13.016, |
|
"eval_steps_per_second": 1.627, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.323581313272035e-05, |
|
"loss": 1.8208, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.318937494195226e-05, |
|
"loss": 1.8022, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.314293675118418e-05, |
|
"loss": 1.8168, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.309649856041609e-05, |
|
"loss": 1.8046, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.3050060369648e-05, |
|
"loss": 1.8199, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.300362217887991e-05, |
|
"loss": 1.8291, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.2957183988111825e-05, |
|
"loss": 1.7895, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.291074579734374e-05, |
|
"loss": 1.8152, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.286430760657565e-05, |
|
"loss": 1.8328, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2817869415807564e-05, |
|
"loss": 1.8309, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 1.6674766540527344, |
|
"eval_rouge1": 24.5108, |
|
"eval_rouge2": 11.8038, |
|
"eval_rougeL": 20.2691, |
|
"eval_rougeLsum": 23.117, |
|
"eval_runtime": 1029.1013, |
|
"eval_samples_per_second": 12.99, |
|
"eval_steps_per_second": 1.624, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.277143122503947e-05, |
|
"loss": 1.7957, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2724993034271383e-05, |
|
"loss": 1.8033, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.26785548435033e-05, |
|
"loss": 1.8517, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.263211665273521e-05, |
|
"loss": 1.804, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.258567846196712e-05, |
|
"loss": 1.7834, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2539240271199036e-05, |
|
"loss": 1.819, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.249280208043094e-05, |
|
"loss": 1.8737, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.244636388966286e-05, |
|
"loss": 1.7807, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2399925698894775e-05, |
|
"loss": 1.8099, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.235348750812668e-05, |
|
"loss": 1.8237, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6654016971588135, |
|
"eval_rouge1": 24.482, |
|
"eval_rouge2": 11.7485, |
|
"eval_rougeL": 20.2225, |
|
"eval_rougeLsum": 23.0917, |
|
"eval_runtime": 1033.6425, |
|
"eval_samples_per_second": 12.933, |
|
"eval_steps_per_second": 1.617, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2307049317358595e-05, |
|
"loss": 1.7796, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2260611126590515e-05, |
|
"loss": 1.8055, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.221417293582242e-05, |
|
"loss": 1.8558, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2167734745054334e-05, |
|
"loss": 1.8292, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.212129655428625e-05, |
|
"loss": 1.8351, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2074858363518154e-05, |
|
"loss": 1.8071, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2028420172750074e-05, |
|
"loss": 1.7892, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.198198198198199e-05, |
|
"loss": 1.864, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.193554379121389e-05, |
|
"loss": 1.8289, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1889105600445806e-05, |
|
"loss": 1.7743, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_gen_len": 18.999251944943147, |
|
"eval_loss": 1.6681499481201172, |
|
"eval_rouge1": 24.5106, |
|
"eval_rouge2": 11.7511, |
|
"eval_rougeL": 20.2583, |
|
"eval_rougeLsum": 23.123, |
|
"eval_runtime": 1040.1281, |
|
"eval_samples_per_second": 12.852, |
|
"eval_steps_per_second": 1.607, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.184266740967772e-05, |
|
"loss": 1.77, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.179622921890963e-05, |
|
"loss": 1.7844, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1749791028141546e-05, |
|
"loss": 1.791, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.170335283737346e-05, |
|
"loss": 1.8038, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1656914646605365e-05, |
|
"loss": 1.8211, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1610476455837285e-05, |
|
"loss": 1.8608, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.15640382650692e-05, |
|
"loss": 1.8124, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1517600074301105e-05, |
|
"loss": 1.8327, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.147116188353302e-05, |
|
"loss": 1.7969, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.142472369276493e-05, |
|
"loss": 1.7811, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_gen_len": 18.999102333931777, |
|
"eval_loss": 1.6636024713516235, |
|
"eval_rouge1": 24.6194, |
|
"eval_rouge2": 11.843, |
|
"eval_rougeL": 20.3375, |
|
"eval_rougeLsum": 23.2259, |
|
"eval_runtime": 1032.055, |
|
"eval_samples_per_second": 12.953, |
|
"eval_steps_per_second": 1.619, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1378285501996844e-05, |
|
"loss": 1.8392, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.133184731122876e-05, |
|
"loss": 1.7993, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.128540912046067e-05, |
|
"loss": 1.7919, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.123897092969258e-05, |
|
"loss": 1.7836, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.119253273892449e-05, |
|
"loss": 1.815, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.114609454815641e-05, |
|
"loss": 1.8078, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1099656357388316e-05, |
|
"loss": 1.8451, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.105321816662023e-05, |
|
"loss": 1.8362, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.100677997585214e-05, |
|
"loss": 1.7685, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.0960341785084055e-05, |
|
"loss": 1.7973, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.666628360748291, |
|
"eval_rouge1": 24.5434, |
|
"eval_rouge2": 11.8133, |
|
"eval_rougeL": 20.3033, |
|
"eval_rougeLsum": 23.165, |
|
"eval_runtime": 1027.5741, |
|
"eval_samples_per_second": 13.009, |
|
"eval_steps_per_second": 1.626, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.091390359431597e-05, |
|
"loss": 1.8079, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.086746540354788e-05, |
|
"loss": 1.8149, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.082102721277979e-05, |
|
"loss": 1.8236, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.07745890220117e-05, |
|
"loss": 1.8512, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.072815083124362e-05, |
|
"loss": 1.7871, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.068171264047553e-05, |
|
"loss": 1.763, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.063527444970744e-05, |
|
"loss": 1.7681, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0588836258939354e-05, |
|
"loss": 1.7997, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.054239806817126e-05, |
|
"loss": 1.7942, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.049595987740318e-05, |
|
"loss": 1.8156, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.6660268306732178, |
|
"eval_rouge1": 24.4857, |
|
"eval_rouge2": 11.7526, |
|
"eval_rougeL": 20.2406, |
|
"eval_rougeLsum": 23.1081, |
|
"eval_runtime": 1032.3479, |
|
"eval_samples_per_second": 12.949, |
|
"eval_steps_per_second": 1.619, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.044952168663509e-05, |
|
"loss": 1.8213, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0403083495867003e-05, |
|
"loss": 1.8291, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0356645305098913e-05, |
|
"loss": 1.8549, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.031020711433083e-05, |
|
"loss": 1.8268, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0263768923562742e-05, |
|
"loss": 1.819, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0217330732794652e-05, |
|
"loss": 1.8526, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0170892542026562e-05, |
|
"loss": 1.8245, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0124454351258475e-05, |
|
"loss": 1.786, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.007801616049039e-05, |
|
"loss": 1.8223, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.00315779697223e-05, |
|
"loss": 1.8403, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6620622873306274, |
|
"eval_rouge1": 24.4632, |
|
"eval_rouge2": 11.7525, |
|
"eval_rougeL": 20.2459, |
|
"eval_rougeLsum": 23.0692, |
|
"eval_runtime": 1022.9465, |
|
"eval_samples_per_second": 13.068, |
|
"eval_steps_per_second": 1.634, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9985139778954214e-05, |
|
"loss": 1.7771, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9938701588186124e-05, |
|
"loss": 1.7898, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9892263397418034e-05, |
|
"loss": 1.8423, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9845825206649954e-05, |
|
"loss": 1.8072, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9799387015881863e-05, |
|
"loss": 1.7805, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9752948825113773e-05, |
|
"loss": 1.8016, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9706510634345686e-05, |
|
"loss": 1.8198, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9660072443577603e-05, |
|
"loss": 1.845, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9613634252809513e-05, |
|
"loss": 1.8332, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9567196062041426e-05, |
|
"loss": 1.8129, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.66425359249115, |
|
"eval_rouge1": 24.6032, |
|
"eval_rouge2": 11.8251, |
|
"eval_rougeL": 20.3368, |
|
"eval_rougeLsum": 23.1806, |
|
"eval_runtime": 1020.3885, |
|
"eval_samples_per_second": 13.101, |
|
"eval_steps_per_second": 1.638, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9520757871273335e-05, |
|
"loss": 1.8524, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9474319680505245e-05, |
|
"loss": 1.8298, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9427881489737165e-05, |
|
"loss": 1.8446, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9381443298969075e-05, |
|
"loss": 1.8012, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9335005108200985e-05, |
|
"loss": 1.8111, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9288566917432898e-05, |
|
"loss": 1.7765, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9242128726664808e-05, |
|
"loss": 1.8229, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9195690535896724e-05, |
|
"loss": 1.8203, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9149252345128637e-05, |
|
"loss": 1.7998, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9102814154360547e-05, |
|
"loss": 1.7896, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_gen_len": 18.999251944943147, |
|
"eval_loss": 1.6622380018234253, |
|
"eval_rouge1": 24.4619, |
|
"eval_rouge2": 11.7769, |
|
"eval_rougeL": 20.2516, |
|
"eval_rougeLsum": 23.0647, |
|
"eval_runtime": 1031.3113, |
|
"eval_samples_per_second": 12.962, |
|
"eval_steps_per_second": 1.62, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9056375963592457e-05, |
|
"loss": 1.8123, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9009937772824373e-05, |
|
"loss": 1.8106, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8963499582056286e-05, |
|
"loss": 1.8164, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8917061391288196e-05, |
|
"loss": 1.7791, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.887062320052011e-05, |
|
"loss": 1.7977, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.882418500975202e-05, |
|
"loss": 1.749, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8777746818983935e-05, |
|
"loss": 1.827, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.873130862821585e-05, |
|
"loss": 1.8235, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.868487043744776e-05, |
|
"loss": 1.7922, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8638432246679668e-05, |
|
"loss": 1.7948, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_gen_len": 18.999177139437464, |
|
"eval_loss": 1.6607564687728882, |
|
"eval_rouge1": 24.5468, |
|
"eval_rouge2": 11.8041, |
|
"eval_rougeL": 20.2941, |
|
"eval_rougeLsum": 23.1551, |
|
"eval_runtime": 1037.8425, |
|
"eval_samples_per_second": 12.881, |
|
"eval_steps_per_second": 1.61, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.859199405591158e-05, |
|
"loss": 1.7752, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8545555865143498e-05, |
|
"loss": 1.8504, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8499117674375408e-05, |
|
"loss": 1.8319, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.845267948360732e-05, |
|
"loss": 1.7981, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.840624129283923e-05, |
|
"loss": 1.8179, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8359803102071147e-05, |
|
"loss": 1.7838, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.831336491130306e-05, |
|
"loss": 1.799, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.826692672053497e-05, |
|
"loss": 1.812, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.822048852976688e-05, |
|
"loss": 1.8216, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8174050338998793e-05, |
|
"loss": 1.8043, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.6613510847091675, |
|
"eval_rouge1": 24.5774, |
|
"eval_rouge2": 11.8246, |
|
"eval_rougeL": 20.3189, |
|
"eval_rougeLsum": 23.1836, |
|
"eval_runtime": 1024.3233, |
|
"eval_samples_per_second": 13.051, |
|
"eval_steps_per_second": 1.631, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.812761214823071e-05, |
|
"loss": 1.8114, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.808117395746262e-05, |
|
"loss": 1.7885, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8034735766694532e-05, |
|
"loss": 1.7741, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.7988297575926442e-05, |
|
"loss": 1.8216, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.794185938515835e-05, |
|
"loss": 1.8408, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.789542119439027e-05, |
|
"loss": 1.7825, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.784898300362218e-05, |
|
"loss": 1.8244, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.780254481285409e-05, |
|
"loss": 1.8074, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7756106622086004e-05, |
|
"loss": 1.8102, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7709668431317914e-05, |
|
"loss": 1.7884, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_gen_len": 18.999326750448834, |
|
"eval_loss": 1.658116340637207, |
|
"eval_rouge1": 24.5688, |
|
"eval_rouge2": 11.843, |
|
"eval_rougeL": 20.2993, |
|
"eval_rougeLsum": 23.1756, |
|
"eval_runtime": 1026.94, |
|
"eval_samples_per_second": 13.017, |
|
"eval_steps_per_second": 1.627, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.766323024054983e-05, |
|
"loss": 1.8173, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7616792049781744e-05, |
|
"loss": 1.7631, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7570353859013653e-05, |
|
"loss": 1.7902, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7523915668245563e-05, |
|
"loss": 1.8197, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7477477477477483e-05, |
|
"loss": 1.8447, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7431039286709393e-05, |
|
"loss": 1.7617, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7384601095941302e-05, |
|
"loss": 1.8125, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7338162905173216e-05, |
|
"loss": 1.8059, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7291724714405125e-05, |
|
"loss": 1.8517, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7245286523637042e-05, |
|
"loss": 1.8041, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6614130735397339, |
|
"eval_rouge1": 24.5454, |
|
"eval_rouge2": 11.8346, |
|
"eval_rougeL": 20.3179, |
|
"eval_rougeLsum": 23.1605, |
|
"eval_runtime": 1026.6117, |
|
"eval_samples_per_second": 13.021, |
|
"eval_steps_per_second": 1.628, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7198848332868955e-05, |
|
"loss": 1.7901, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7152410142100865e-05, |
|
"loss": 1.8027, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7105971951332774e-05, |
|
"loss": 1.7979, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7059533760564688e-05, |
|
"loss": 1.7853, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7013095569796604e-05, |
|
"loss": 1.7663, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6966657379028514e-05, |
|
"loss": 1.7904, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6920219188260427e-05, |
|
"loss": 1.8164, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6873780997492337e-05, |
|
"loss": 1.8056, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6827342806724253e-05, |
|
"loss": 1.7842, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6780904615956166e-05, |
|
"loss": 1.8192, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6596847772598267, |
|
"eval_rouge1": 24.5017, |
|
"eval_rouge2": 11.7755, |
|
"eval_rougeL": 20.2439, |
|
"eval_rougeLsum": 23.1148, |
|
"eval_runtime": 1036.17, |
|
"eval_samples_per_second": 12.901, |
|
"eval_steps_per_second": 1.613, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6734466425188076e-05, |
|
"loss": 1.7418, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6688028234419986e-05, |
|
"loss": 1.7761, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.66415900436519e-05, |
|
"loss": 1.7924, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6595151852883816e-05, |
|
"loss": 1.8212, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6548713662115725e-05, |
|
"loss": 1.7813, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.650227547134764e-05, |
|
"loss": 1.8258, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6455837280579548e-05, |
|
"loss": 1.8491, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.640939908981146e-05, |
|
"loss": 1.8285, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6362960899043378e-05, |
|
"loss": 1.8399, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6316522708275288e-05, |
|
"loss": 1.8679, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 1.6554793119430542, |
|
"eval_rouge1": 24.5302, |
|
"eval_rouge2": 11.7638, |
|
"eval_rougeL": 20.2592, |
|
"eval_rougeLsum": 23.1395, |
|
"eval_runtime": 1033.7739, |
|
"eval_samples_per_second": 12.931, |
|
"eval_steps_per_second": 1.616, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6270084517507197e-05, |
|
"loss": 1.7982, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.622364632673911e-05, |
|
"loss": 1.8176, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6177208135971027e-05, |
|
"loss": 1.8155, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6130769945202937e-05, |
|
"loss": 1.8084, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.608433175443485e-05, |
|
"loss": 1.824, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.603789356366676e-05, |
|
"loss": 1.7652, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.599145537289867e-05, |
|
"loss": 1.7971, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.594501718213059e-05, |
|
"loss": 1.8167, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.58985789913625e-05, |
|
"loss": 1.789, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.585214080059441e-05, |
|
"loss": 1.82, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6571003198623657, |
|
"eval_rouge1": 24.546, |
|
"eval_rouge2": 11.7798, |
|
"eval_rougeL": 20.265, |
|
"eval_rougeLsum": 23.1408, |
|
"eval_runtime": 1022.2185, |
|
"eval_samples_per_second": 13.077, |
|
"eval_steps_per_second": 1.635, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5805702609826322e-05, |
|
"loss": 1.8416, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.575926441905823e-05, |
|
"loss": 1.851, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5712826228290148e-05, |
|
"loss": 1.8012, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.566638803752206e-05, |
|
"loss": 1.8015, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.561994984675397e-05, |
|
"loss": 1.7634, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.557351165598588e-05, |
|
"loss": 1.8303, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5527073465217797e-05, |
|
"loss": 1.8022, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.548063527444971e-05, |
|
"loss": 1.8292, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.543419708368162e-05, |
|
"loss": 1.7705, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5387758892913533e-05, |
|
"loss": 1.8267, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.655230164527893, |
|
"eval_rouge1": 24.5214, |
|
"eval_rouge2": 11.7368, |
|
"eval_rougeL": 20.2276, |
|
"eval_rougeLsum": 23.1504, |
|
"eval_runtime": 1027.6634, |
|
"eval_samples_per_second": 13.008, |
|
"eval_steps_per_second": 1.626, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5341320702145443e-05, |
|
"loss": 1.8101, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.529488251137736e-05, |
|
"loss": 1.7931, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5248444320609273e-05, |
|
"loss": 1.8394, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5202006129841182e-05, |
|
"loss": 1.8143, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5155567939073092e-05, |
|
"loss": 1.8197, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5109129748305005e-05, |
|
"loss": 1.8011, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5062691557536922e-05, |
|
"loss": 1.8094, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.501625336676883e-05, |
|
"loss": 1.816, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4969815176000745e-05, |
|
"loss": 1.8274, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4923376985232658e-05, |
|
"loss": 1.8063, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_gen_len": 18.999177139437464, |
|
"eval_loss": 1.6588006019592285, |
|
"eval_rouge1": 24.5222, |
|
"eval_rouge2": 11.8209, |
|
"eval_rougeL": 20.2941, |
|
"eval_rougeLsum": 23.1551, |
|
"eval_runtime": 1031.4983, |
|
"eval_samples_per_second": 12.96, |
|
"eval_steps_per_second": 1.62, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4876938794464568e-05, |
|
"loss": 1.8088, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.483050060369648e-05, |
|
"loss": 1.8148, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4784062412928394e-05, |
|
"loss": 1.8075, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4737624222160304e-05, |
|
"loss": 1.8276, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.469118603139222e-05, |
|
"loss": 1.7834, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.464474784062413e-05, |
|
"loss": 1.7928, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4598309649856043e-05, |
|
"loss": 1.7802, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4551871459087956e-05, |
|
"loss": 1.8131, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4505433268319866e-05, |
|
"loss": 1.7951, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.445899507755178e-05, |
|
"loss": 1.8171, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.656943440437317, |
|
"eval_rouge1": 24.5845, |
|
"eval_rouge2": 11.8182, |
|
"eval_rougeL": 20.3147, |
|
"eval_rougeLsum": 23.1812, |
|
"eval_runtime": 1034.3258, |
|
"eval_samples_per_second": 12.924, |
|
"eval_steps_per_second": 1.616, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4412556886783692e-05, |
|
"loss": 1.8054, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4366118696015605e-05, |
|
"loss": 1.8182, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4319680505247515e-05, |
|
"loss": 1.8019, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.427324231447943e-05, |
|
"loss": 1.7822, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.422680412371134e-05, |
|
"loss": 1.7951, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.418036593294325e-05, |
|
"loss": 1.8077, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4133927742175168e-05, |
|
"loss": 1.7961, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4087489551407077e-05, |
|
"loss": 1.7619, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.404105136063899e-05, |
|
"loss": 1.7999, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3994613169870904e-05, |
|
"loss": 1.7884, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6596565246582031, |
|
"eval_rouge1": 24.532, |
|
"eval_rouge2": 11.8057, |
|
"eval_rougeL": 20.2622, |
|
"eval_rougeLsum": 23.1459, |
|
"eval_runtime": 1029.0813, |
|
"eval_samples_per_second": 12.99, |
|
"eval_steps_per_second": 1.624, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3948174979102817e-05, |
|
"loss": 1.8125, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3901736788334726e-05, |
|
"loss": 1.7802, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.385529859756664e-05, |
|
"loss": 1.7974, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3808860406798553e-05, |
|
"loss": 1.8124, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3762422216030463e-05, |
|
"loss": 1.8032, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.371598402526238e-05, |
|
"loss": 1.7827, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.366954583449429e-05, |
|
"loss": 1.7876, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3623107643726202e-05, |
|
"loss": 1.7645, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3576669452958115e-05, |
|
"loss": 1.8223, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3530231262190025e-05, |
|
"loss": 1.7588, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_gen_len": 18.999401555954517, |
|
"eval_loss": 1.657245397567749, |
|
"eval_rouge1": 24.6532, |
|
"eval_rouge2": 11.8958, |
|
"eval_rougeL": 20.3877, |
|
"eval_rougeLsum": 23.2776, |
|
"eval_runtime": 1034.2975, |
|
"eval_samples_per_second": 12.925, |
|
"eval_steps_per_second": 1.616, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3483793071421938e-05, |
|
"loss": 1.8284, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.343735488065385e-05, |
|
"loss": 1.7801, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3390916689885764e-05, |
|
"loss": 1.8094, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3344478499117674e-05, |
|
"loss": 1.7872, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.329804030834959e-05, |
|
"loss": 1.8076, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.32516021175815e-05, |
|
"loss": 1.7796, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.320516392681341e-05, |
|
"loss": 1.7982, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3158725736045326e-05, |
|
"loss": 1.7809, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3112287545277236e-05, |
|
"loss": 1.8185, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.306584935450915e-05, |
|
"loss": 1.7847, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6560837030410767, |
|
"eval_rouge1": 24.5483, |
|
"eval_rouge2": 11.856, |
|
"eval_rougeL": 20.3188, |
|
"eval_rougeLsum": 23.1852, |
|
"eval_runtime": 1032.6185, |
|
"eval_samples_per_second": 12.946, |
|
"eval_steps_per_second": 1.618, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3019411163741062e-05, |
|
"loss": 1.8374, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2972972972972976e-05, |
|
"loss": 1.7944, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2926534782204885e-05, |
|
"loss": 1.826, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.28800965914368e-05, |
|
"loss": 1.8044, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.283365840066871e-05, |
|
"loss": 1.8192, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.278722020990062e-05, |
|
"loss": 1.812, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2740782019132538e-05, |
|
"loss": 1.79, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2694343828364448e-05, |
|
"loss": 1.7627, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.264790563759636e-05, |
|
"loss": 1.7941, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2601467446828274e-05, |
|
"loss": 1.8523, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6583877801895142, |
|
"eval_rouge1": 24.5501, |
|
"eval_rouge2": 11.8666, |
|
"eval_rougeL": 20.3197, |
|
"eval_rougeLsum": 23.1683, |
|
"eval_runtime": 1024.679, |
|
"eval_samples_per_second": 13.046, |
|
"eval_steps_per_second": 1.631, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2555029256060184e-05, |
|
"loss": 1.7878, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2508591065292097e-05, |
|
"loss": 1.8199, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.246215287452401e-05, |
|
"loss": 1.8112, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2415714683755923e-05, |
|
"loss": 1.808, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2369276492987833e-05, |
|
"loss": 1.8271, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.232283830221975e-05, |
|
"loss": 1.8064, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.227640011145166e-05, |
|
"loss": 1.8218, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.222996192068357e-05, |
|
"loss": 1.7955, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2183523729915485e-05, |
|
"loss": 1.822, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2137085539147395e-05, |
|
"loss": 1.7955, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_gen_len": 18.999925194494313, |
|
"eval_loss": 1.6546396017074585, |
|
"eval_rouge1": 24.5126, |
|
"eval_rouge2": 11.8043, |
|
"eval_rougeL": 20.2603, |
|
"eval_rougeLsum": 23.1175, |
|
"eval_runtime": 1034.7782, |
|
"eval_samples_per_second": 12.919, |
|
"eval_steps_per_second": 1.615, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2090647348379308e-05, |
|
"loss": 1.8306, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.204420915761122e-05, |
|
"loss": 1.8132, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1997770966843135e-05, |
|
"loss": 1.8131, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1951332776075044e-05, |
|
"loss": 1.7976, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1904894585306957e-05, |
|
"loss": 1.7933, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.185845639453887e-05, |
|
"loss": 1.8118, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.181201820377078e-05, |
|
"loss": 1.8073, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1765580013002697e-05, |
|
"loss": 1.7865, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1719141822234607e-05, |
|
"loss": 1.7636, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1672703631466516e-05, |
|
"loss": 1.8215, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.654118299484253, |
|
"eval_rouge1": 24.5884, |
|
"eval_rouge2": 11.8003, |
|
"eval_rougeL": 20.2887, |
|
"eval_rougeLsum": 23.1866, |
|
"eval_runtime": 1027.8853, |
|
"eval_samples_per_second": 13.005, |
|
"eval_steps_per_second": 1.626, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1626265440698433e-05, |
|
"loss": 1.8436, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1579827249930343e-05, |
|
"loss": 1.7967, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1533389059162256e-05, |
|
"loss": 1.7764, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.148695086839417e-05, |
|
"loss": 1.8181, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1440512677626082e-05, |
|
"loss": 1.7699, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1394074486857992e-05, |
|
"loss": 1.8442, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1347636296089905e-05, |
|
"loss": 1.7805, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1301198105321818e-05, |
|
"loss": 1.8129, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1254759914553728e-05, |
|
"loss": 1.7628, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1208321723785644e-05, |
|
"loss": 1.7917, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 1.6567963361740112, |
|
"eval_rouge1": 24.619, |
|
"eval_rouge2": 11.8868, |
|
"eval_rougeL": 20.3496, |
|
"eval_rougeLsum": 23.2304, |
|
"eval_runtime": 1035.7478, |
|
"eval_samples_per_second": 12.907, |
|
"eval_steps_per_second": 1.613, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1161883533017554e-05, |
|
"loss": 1.7878, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1115445342249467e-05, |
|
"loss": 1.7953, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.106900715148138e-05, |
|
"loss": 1.826, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.102256896071329e-05, |
|
"loss": 1.8013, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0976130769945203e-05, |
|
"loss": 1.8322, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0929692579177116e-05, |
|
"loss": 1.7874, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.088325438840903e-05, |
|
"loss": 1.8104, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.083681619764094e-05, |
|
"loss": 1.7578, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0790378006872856e-05, |
|
"loss": 1.8312, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0743939816104765e-05, |
|
"loss": 1.7543, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6570059061050415, |
|
"eval_rouge1": 24.5378, |
|
"eval_rouge2": 11.8192, |
|
"eval_rougeL": 20.2681, |
|
"eval_rougeLsum": 23.1454, |
|
"eval_runtime": 1031.5618, |
|
"eval_samples_per_second": 12.959, |
|
"eval_steps_per_second": 1.62, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0697501625336675e-05, |
|
"loss": 1.8303, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.065106343456859e-05, |
|
"loss": 1.7995, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.06046252438005e-05, |
|
"loss": 1.814, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0558187053032415e-05, |
|
"loss": 1.8235, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0511748862264328e-05, |
|
"loss": 1.8151, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.046531067149624e-05, |
|
"loss": 1.8041, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.041887248072815e-05, |
|
"loss": 1.8169, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0372434289960064e-05, |
|
"loss": 1.7921, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0325996099191977e-05, |
|
"loss": 1.8136, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0279557908423887e-05, |
|
"loss": 1.7978, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.6541448831558228, |
|
"eval_rouge1": 24.5719, |
|
"eval_rouge2": 11.8446, |
|
"eval_rougeL": 20.2873, |
|
"eval_rougeLsum": 23.1855, |
|
"eval_runtime": 1033.6446, |
|
"eval_samples_per_second": 12.933, |
|
"eval_steps_per_second": 1.617, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0233119717655803e-05, |
|
"loss": 1.799, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0186681526887713e-05, |
|
"loss": 1.8007, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0140243336119626e-05, |
|
"loss": 1.7989, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.009380514535154e-05, |
|
"loss": 1.8069, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.004736695458345e-05, |
|
"loss": 1.7949, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0000928763815362e-05, |
|
"loss": 1.8045, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9954490573047275e-05, |
|
"loss": 1.8275, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9908052382279188e-05, |
|
"loss": 1.8041, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9861614191511098e-05, |
|
"loss": 1.7827, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9815176000743015e-05, |
|
"loss": 1.8228, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6561096906661987, |
|
"eval_rouge1": 24.5193, |
|
"eval_rouge2": 11.8527, |
|
"eval_rougeL": 20.3185, |
|
"eval_rougeLsum": 23.1395, |
|
"eval_runtime": 1033.2498, |
|
"eval_samples_per_second": 12.938, |
|
"eval_steps_per_second": 1.617, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9768737809974924e-05, |
|
"loss": 1.8076, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9722299619206834e-05, |
|
"loss": 1.7672, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.967586142843875e-05, |
|
"loss": 1.8212, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.962942323767066e-05, |
|
"loss": 1.8367, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9582985046902573e-05, |
|
"loss": 1.8581, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9536546856134487e-05, |
|
"loss": 1.7974, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.94901086653664e-05, |
|
"loss": 1.7626, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.944367047459831e-05, |
|
"loss": 1.7704, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9397232283830223e-05, |
|
"loss": 1.8347, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9350794093062136e-05, |
|
"loss": 1.8163, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6537002325057983, |
|
"eval_rouge1": 24.4385, |
|
"eval_rouge2": 11.7625, |
|
"eval_rougeL": 20.2042, |
|
"eval_rougeLsum": 23.0671, |
|
"eval_runtime": 1036.124, |
|
"eval_samples_per_second": 12.902, |
|
"eval_steps_per_second": 1.613, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9304355902294045e-05, |
|
"loss": 1.7862, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9257917711525962e-05, |
|
"loss": 1.7817, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9211479520757872e-05, |
|
"loss": 1.8224, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9165041329989785e-05, |
|
"loss": 1.808, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9118603139221698e-05, |
|
"loss": 1.7908, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9072164948453608e-05, |
|
"loss": 1.8254, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.902572675768552e-05, |
|
"loss": 1.7312, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8979288566917434e-05, |
|
"loss": 1.7895, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8932850376149347e-05, |
|
"loss": 1.7815, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8886412185381257e-05, |
|
"loss": 1.7868, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.653245210647583, |
|
"eval_rouge1": 24.4985, |
|
"eval_rouge2": 11.8187, |
|
"eval_rougeL": 20.2775, |
|
"eval_rougeLsum": 23.1426, |
|
"eval_runtime": 1028.3624, |
|
"eval_samples_per_second": 12.999, |
|
"eval_steps_per_second": 1.625, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8839973994613173e-05, |
|
"loss": 1.8127, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8793535803845083e-05, |
|
"loss": 1.8062, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8747097613076993e-05, |
|
"loss": 1.809, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.870065942230891e-05, |
|
"loss": 1.7546, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.865422123154082e-05, |
|
"loss": 1.7803, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8607783040772732e-05, |
|
"loss": 1.8209, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8561344850004645e-05, |
|
"loss": 1.8339, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.851490665923656e-05, |
|
"loss": 1.8038, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.846846846846847e-05, |
|
"loss": 1.806, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.842203027770038e-05, |
|
"loss": 1.8345, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.6522008180618286, |
|
"eval_rouge1": 24.5375, |
|
"eval_rouge2": 11.8398, |
|
"eval_rougeL": 20.285, |
|
"eval_rougeLsum": 23.1643, |
|
"eval_runtime": 1032.6207, |
|
"eval_samples_per_second": 12.946, |
|
"eval_steps_per_second": 1.618, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8375592086932295e-05, |
|
"loss": 1.7729, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8329153896164204e-05, |
|
"loss": 1.7864, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.828271570539612e-05, |
|
"loss": 1.8057, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.823627751462803e-05, |
|
"loss": 1.7886, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8189839323859944e-05, |
|
"loss": 1.7784, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8143401133091857e-05, |
|
"loss": 1.7989, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8096962942323767e-05, |
|
"loss": 1.7866, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.805052475155568e-05, |
|
"loss": 1.8348, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8004086560787593e-05, |
|
"loss": 1.7391, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7957648370019506e-05, |
|
"loss": 1.7773, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.6528586149215698, |
|
"eval_rouge1": 24.4722, |
|
"eval_rouge2": 11.7979, |
|
"eval_rougeL": 20.2636, |
|
"eval_rougeLsum": 23.106, |
|
"eval_runtime": 1032.709, |
|
"eval_samples_per_second": 12.945, |
|
"eval_steps_per_second": 1.618, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7911210179251416e-05, |
|
"loss": 1.8172, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7864771988483332e-05, |
|
"loss": 1.7618, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7818333797715242e-05, |
|
"loss": 1.7885, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7771895606947152e-05, |
|
"loss": 1.7803, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.772545741617907e-05, |
|
"loss": 1.8239, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7679019225410978e-05, |
|
"loss": 1.8412, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.763258103464289e-05, |
|
"loss": 1.8328, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7586142843874804e-05, |
|
"loss": 1.8049, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7539704653106717e-05, |
|
"loss": 1.8353, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7493266462338627e-05, |
|
"loss": 1.8409, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.6521377563476562, |
|
"eval_rouge1": 24.4845, |
|
"eval_rouge2": 11.8136, |
|
"eval_rougeL": 20.2557, |
|
"eval_rougeLsum": 23.1089, |
|
"eval_runtime": 1028.0193, |
|
"eval_samples_per_second": 13.004, |
|
"eval_steps_per_second": 1.625, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.744682827157054e-05, |
|
"loss": 1.7909, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7400390080802453e-05, |
|
"loss": 1.8121, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7353951890034363e-05, |
|
"loss": 1.814, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.730751369926628e-05, |
|
"loss": 1.7987, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.726107550849819e-05, |
|
"loss": 1.7947, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.72146373177301e-05, |
|
"loss": 1.7807, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7168199126962016e-05, |
|
"loss": 1.8074, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7121760936193926e-05, |
|
"loss": 1.7884, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.707532274542584e-05, |
|
"loss": 1.7609, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7028884554657752e-05, |
|
"loss": 1.8146, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_gen_len": 18.99985038898863, |
|
"eval_loss": 1.6515237092971802, |
|
"eval_rouge1": 24.4923, |
|
"eval_rouge2": 11.7965, |
|
"eval_rougeL": 20.2521, |
|
"eval_rougeLsum": 23.1247, |
|
"eval_runtime": 1024.8513, |
|
"eval_samples_per_second": 13.044, |
|
"eval_steps_per_second": 1.63, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6982446363889665e-05, |
|
"loss": 1.7835, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6936008173121575e-05, |
|
"loss": 1.8227, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6889569982353488e-05, |
|
"loss": 1.8015, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.68431317915854e-05, |
|
"loss": 1.79, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.679669360081731e-05, |
|
"loss": 1.7974, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6750255410049227e-05, |
|
"loss": 1.8084, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6703817219281137e-05, |
|
"loss": 1.8178, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.665737902851305e-05, |
|
"loss": 1.7481, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6610940837744963e-05, |
|
"loss": 1.7653, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6564502646976873e-05, |
|
"loss": 1.7466, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.6526038646697998, |
|
"eval_rouge1": 24.4913, |
|
"eval_rouge2": 11.8254, |
|
"eval_rougeL": 20.2562, |
|
"eval_rougeLsum": 23.1266, |
|
"eval_runtime": 1034.5967, |
|
"eval_samples_per_second": 12.921, |
|
"eval_steps_per_second": 1.615, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6518064456208786e-05, |
|
"loss": 1.7975, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.64716262654407e-05, |
|
"loss": 1.7617, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6425188074672612e-05, |
|
"loss": 1.7838, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6378749883904522e-05, |
|
"loss": 1.8457, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.633231169313644e-05, |
|
"loss": 1.7611, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.628587350236835e-05, |
|
"loss": 1.788, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6239435311600258e-05, |
|
"loss": 1.785, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6192997120832175e-05, |
|
"loss": 1.8231, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6146558930064084e-05, |
|
"loss": 1.7751, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6100120739295998e-05, |
|
"loss": 1.8009, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.6505471467971802, |
|
"eval_rouge1": 24.5231, |
|
"eval_rouge2": 11.8414, |
|
"eval_rougeL": 20.2842, |
|
"eval_rougeLsum": 23.1654, |
|
"eval_runtime": 1023.213, |
|
"eval_samples_per_second": 13.065, |
|
"eval_steps_per_second": 1.633, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.605368254852791e-05, |
|
"loss": 1.8207, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6007244357759824e-05, |
|
"loss": 1.7569, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5960806166991734e-05, |
|
"loss": 1.7819, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5914367976223647e-05, |
|
"loss": 1.7628, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.586792978545556e-05, |
|
"loss": 1.8075, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.582149159468747e-05, |
|
"loss": 1.7719, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5775053403919386e-05, |
|
"loss": 1.8057, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5728615213151296e-05, |
|
"loss": 1.7819, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.568217702238321e-05, |
|
"loss": 1.7737, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5635738831615122e-05, |
|
"loss": 1.7768, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.6516097784042358, |
|
"eval_rouge1": 24.5192, |
|
"eval_rouge2": 11.8206, |
|
"eval_rougeL": 20.2884, |
|
"eval_rougeLsum": 23.1493, |
|
"eval_runtime": 1036.5849, |
|
"eval_samples_per_second": 12.896, |
|
"eval_steps_per_second": 1.612, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5589300640847032e-05, |
|
"loss": 1.8128, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5542862450078945e-05, |
|
"loss": 1.8214, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5496424259310858e-05, |
|
"loss": 1.8027, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.544998606854277e-05, |
|
"loss": 1.7881, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.540354787777468e-05, |
|
"loss": 1.8071, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5357109687006598e-05, |
|
"loss": 1.7745, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5310671496238507e-05, |
|
"loss": 1.7855, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5264233305470417e-05, |
|
"loss": 1.7824, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5217795114702332e-05, |
|
"loss": 1.8059, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5171356923934243e-05, |
|
"loss": 1.7569, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.6540724039077759, |
|
"eval_rouge1": 24.6135, |
|
"eval_rouge2": 11.9135, |
|
"eval_rougeL": 20.3513, |
|
"eval_rougeLsum": 23.2279, |
|
"eval_runtime": 1023.1896, |
|
"eval_samples_per_second": 13.065, |
|
"eval_steps_per_second": 1.633, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5124918733166158e-05, |
|
"loss": 1.8309, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5078480542398068e-05, |
|
"loss": 1.8047, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5032042351629983e-05, |
|
"loss": 1.7746, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4985604160861894e-05, |
|
"loss": 1.8134, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4939165970093804e-05, |
|
"loss": 1.7825, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4892727779325719e-05, |
|
"loss": 1.7939, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.484628958855763e-05, |
|
"loss": 1.7539, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4799851397789543e-05, |
|
"loss": 1.7659, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4753413207021455e-05, |
|
"loss": 1.7926, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.470697501625337e-05, |
|
"loss": 1.7893, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 1.6507411003112793, |
|
"eval_rouge1": 24.5934, |
|
"eval_rouge2": 11.8727, |
|
"eval_rougeL": 20.3305, |
|
"eval_rougeLsum": 23.2106, |
|
"eval_runtime": 1030.6978, |
|
"eval_samples_per_second": 12.97, |
|
"eval_steps_per_second": 1.621, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.466053682548528e-05, |
|
"loss": 1.7752, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.461409863471719e-05, |
|
"loss": 1.8088, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4567660443949106e-05, |
|
"loss": 1.8084, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4521222253181015e-05, |
|
"loss": 1.7934, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.447478406241293e-05, |
|
"loss": 1.7928, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4428345871644842e-05, |
|
"loss": 1.783, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4381907680876755e-05, |
|
"loss": 1.806, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4335469490108666e-05, |
|
"loss": 1.7723, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4289031299340578e-05, |
|
"loss": 1.7634, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.424259310857249e-05, |
|
"loss": 1.763, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_gen_len": 18.999925194494313, |
|
"eval_loss": 1.6511585712432861, |
|
"eval_rouge1": 24.5829, |
|
"eval_rouge2": 11.8543, |
|
"eval_rougeL": 20.3142, |
|
"eval_rougeLsum": 23.2049, |
|
"eval_runtime": 1026.6075, |
|
"eval_samples_per_second": 13.022, |
|
"eval_steps_per_second": 1.628, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4196154917804402e-05, |
|
"loss": 1.7742, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4149716727036317e-05, |
|
"loss": 1.7836, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4103278536268227e-05, |
|
"loss": 1.8182, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4056840345500142e-05, |
|
"loss": 1.7747, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4010402154732053e-05, |
|
"loss": 1.7785, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3963963963963963e-05, |
|
"loss": 1.8223, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3917525773195878e-05, |
|
"loss": 1.7769, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3871087582427789e-05, |
|
"loss": 1.772, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3824649391659702e-05, |
|
"loss": 1.8052, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3778211200891614e-05, |
|
"loss": 1.7552, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6506332159042358, |
|
"eval_rouge1": 24.5332, |
|
"eval_rouge2": 11.8309, |
|
"eval_rougeL": 20.2795, |
|
"eval_rougeLsum": 23.1654, |
|
"eval_runtime": 1034.7013, |
|
"eval_samples_per_second": 12.92, |
|
"eval_steps_per_second": 1.615, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3731773010123527e-05, |
|
"loss": 1.8099, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3685334819355438e-05, |
|
"loss": 1.8016, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.363889662858735e-05, |
|
"loss": 1.8055, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3592458437819264e-05, |
|
"loss": 1.7729, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3546020247051174e-05, |
|
"loss": 1.7733, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3499582056283089e-05, |
|
"loss": 1.8059, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3453143865515e-05, |
|
"loss": 1.7685, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3406705674746914e-05, |
|
"loss": 1.7758, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3360267483978825e-05, |
|
"loss": 1.8085, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3313829293210736e-05, |
|
"loss": 1.7632, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_gen_len": 18.999476361460204, |
|
"eval_loss": 1.6498222351074219, |
|
"eval_rouge1": 24.5569, |
|
"eval_rouge2": 11.8313, |
|
"eval_rougeL": 20.3158, |
|
"eval_rougeLsum": 23.1808, |
|
"eval_runtime": 1034.8948, |
|
"eval_samples_per_second": 12.917, |
|
"eval_steps_per_second": 1.615, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.326739110244265e-05, |
|
"loss": 1.7951, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3220952911674561e-05, |
|
"loss": 1.8045, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3174514720906476e-05, |
|
"loss": 1.8099, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3128076530138386e-05, |
|
"loss": 1.7945, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.30816383393703e-05, |
|
"loss": 1.7755, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3035200148602212e-05, |
|
"loss": 1.8063, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2988761957834122e-05, |
|
"loss": 1.805, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2942323767066036e-05, |
|
"loss": 1.7553, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2895885576297948e-05, |
|
"loss": 1.7999, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2849447385529861e-05, |
|
"loss": 1.8056, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.648803949356079, |
|
"eval_rouge1": 24.6217, |
|
"eval_rouge2": 11.8877, |
|
"eval_rougeL": 20.3555, |
|
"eval_rougeLsum": 23.2514, |
|
"eval_runtime": 1025.8108, |
|
"eval_samples_per_second": 13.032, |
|
"eval_steps_per_second": 1.629, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2803009194761772e-05, |
|
"loss": 1.8237, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2756571003993684e-05, |
|
"loss": 1.7823, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2710132813225597e-05, |
|
"loss": 1.7627, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2663694622457508e-05, |
|
"loss": 1.8085, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2617256431689423e-05, |
|
"loss": 1.802, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2570818240921333e-05, |
|
"loss": 1.7549, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2524380050153248e-05, |
|
"loss": 1.8025, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.247794185938516e-05, |
|
"loss": 1.7366, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.243150366861707e-05, |
|
"loss": 1.7989, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2385065477848984e-05, |
|
"loss": 1.8066, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6494295597076416, |
|
"eval_rouge1": 24.5799, |
|
"eval_rouge2": 11.8515, |
|
"eval_rougeL": 20.3307, |
|
"eval_rougeLsum": 23.2059, |
|
"eval_runtime": 1033.346, |
|
"eval_samples_per_second": 12.937, |
|
"eval_steps_per_second": 1.617, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2338627287080897e-05, |
|
"loss": 1.7608, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2292189096312808e-05, |
|
"loss": 1.7725, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.224575090554472e-05, |
|
"loss": 1.8072, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2199312714776633e-05, |
|
"loss": 1.8053, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2152874524008545e-05, |
|
"loss": 1.7835, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2106436333240458e-05, |
|
"loss": 1.7689, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.205999814247237e-05, |
|
"loss": 1.7674, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2013559951704282e-05, |
|
"loss": 1.7972, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1967121760936195e-05, |
|
"loss": 1.7737, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1920683570168107e-05, |
|
"loss": 1.7903, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.64866042137146, |
|
"eval_rouge1": 24.6151, |
|
"eval_rouge2": 11.889, |
|
"eval_rougeL": 20.3739, |
|
"eval_rougeLsum": 23.2226, |
|
"eval_runtime": 1030.2163, |
|
"eval_samples_per_second": 12.976, |
|
"eval_steps_per_second": 1.622, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1874245379400018e-05, |
|
"loss": 1.8065, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1827807188631931e-05, |
|
"loss": 1.7835, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1781368997863844e-05, |
|
"loss": 1.8129, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1734930807095756e-05, |
|
"loss": 1.7741, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1688492616327669e-05, |
|
"loss": 1.7969, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.164205442555958e-05, |
|
"loss": 1.8015, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1595616234791492e-05, |
|
"loss": 1.8278, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1549178044023405e-05, |
|
"loss": 1.7772, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1502739853255318e-05, |
|
"loss": 1.7987, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.145630166248723e-05, |
|
"loss": 1.805, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6492658853530884, |
|
"eval_rouge1": 24.5739, |
|
"eval_rouge2": 11.8659, |
|
"eval_rougeL": 20.3354, |
|
"eval_rougeLsum": 23.1884, |
|
"eval_runtime": 1027.6519, |
|
"eval_samples_per_second": 13.008, |
|
"eval_steps_per_second": 1.626, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1409863471719143e-05, |
|
"loss": 1.7888, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1363425280951056e-05, |
|
"loss": 1.7929, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1316987090182966e-05, |
|
"loss": 1.7597, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1270548899414879e-05, |
|
"loss": 1.7312, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1224110708646792e-05, |
|
"loss": 1.8046, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1177672517878703e-05, |
|
"loss": 1.7843, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1131234327110617e-05, |
|
"loss": 1.7888, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.108479613634253e-05, |
|
"loss": 1.788, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1038357945574441e-05, |
|
"loss": 1.7549, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0991919754806353e-05, |
|
"loss": 1.7843, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6487183570861816, |
|
"eval_rouge1": 24.6125, |
|
"eval_rouge2": 11.8879, |
|
"eval_rougeL": 20.3648, |
|
"eval_rougeLsum": 23.2274, |
|
"eval_runtime": 1035.6996, |
|
"eval_samples_per_second": 12.907, |
|
"eval_steps_per_second": 1.613, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0945481564038266e-05, |
|
"loss": 1.804, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0899043373270177e-05, |
|
"loss": 1.7363, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.085260518250209e-05, |
|
"loss": 1.766, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0806166991734003e-05, |
|
"loss": 1.7654, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0759728800965915e-05, |
|
"loss": 1.7047, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0713290610197828e-05, |
|
"loss": 1.8477, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.066685241942974e-05, |
|
"loss": 1.8153, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.062041422866165e-05, |
|
"loss": 1.7948, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0573976037893564e-05, |
|
"loss": 1.7346, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0527537847125477e-05, |
|
"loss": 1.8153, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6492679119110107, |
|
"eval_rouge1": 24.5638, |
|
"eval_rouge2": 11.8392, |
|
"eval_rougeL": 20.3084, |
|
"eval_rougeLsum": 23.165, |
|
"eval_runtime": 1016.7714, |
|
"eval_samples_per_second": 13.147, |
|
"eval_steps_per_second": 1.643, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0481099656357389e-05, |
|
"loss": 1.7736, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0434661465589302e-05, |
|
"loss": 1.789, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0388223274821215e-05, |
|
"loss": 1.7885, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0341785084053125e-05, |
|
"loss": 1.8059, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0295346893285038e-05, |
|
"loss": 1.7775, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.024890870251695e-05, |
|
"loss": 1.7642, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0202470511748862e-05, |
|
"loss": 1.8427, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0156032320980775e-05, |
|
"loss": 1.8049, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0109594130212689e-05, |
|
"loss": 1.8209, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.00631559394446e-05, |
|
"loss": 1.7581, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.649044156074524, |
|
"eval_rouge1": 24.6121, |
|
"eval_rouge2": 11.8876, |
|
"eval_rougeL": 20.36, |
|
"eval_rougeLsum": 23.2163, |
|
"eval_runtime": 1017.7889, |
|
"eval_samples_per_second": 13.134, |
|
"eval_steps_per_second": 1.642, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0016717748676511e-05, |
|
"loss": 1.755, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.970279557908425e-06, |
|
"loss": 1.7848, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.923841367140336e-06, |
|
"loss": 1.755, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.877403176372249e-06, |
|
"loss": 1.7571, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.830964985604162e-06, |
|
"loss": 1.7997, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.784526794836074e-06, |
|
"loss": 1.7845, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.738088604067985e-06, |
|
"loss": 1.7804, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.691650413299898e-06, |
|
"loss": 1.8016, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.64521222253181e-06, |
|
"loss": 1.8139, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.598774031763723e-06, |
|
"loss": 1.6925, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6501575708389282, |
|
"eval_rouge1": 24.6192, |
|
"eval_rouge2": 11.8992, |
|
"eval_rougeL": 20.3786, |
|
"eval_rougeLsum": 23.2421, |
|
"eval_runtime": 1018.5996, |
|
"eval_samples_per_second": 13.124, |
|
"eval_steps_per_second": 1.64, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.552335840995636e-06, |
|
"loss": 1.7919, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.505897650227547e-06, |
|
"loss": 1.7798, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 1.7735, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.413021268691372e-06, |
|
"loss": 1.7761, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.366583077923283e-06, |
|
"loss": 1.7919, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.320144887155197e-06, |
|
"loss": 1.7737, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.27370669638711e-06, |
|
"loss": 1.776, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.227268505619021e-06, |
|
"loss": 1.7919, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.180830314850934e-06, |
|
"loss": 1.7506, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.134392124082847e-06, |
|
"loss": 1.7535, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6472678184509277, |
|
"eval_rouge1": 24.6134, |
|
"eval_rouge2": 11.8877, |
|
"eval_rougeL": 20.3663, |
|
"eval_rougeLsum": 23.2262, |
|
"eval_runtime": 1017.7072, |
|
"eval_samples_per_second": 13.135, |
|
"eval_steps_per_second": 1.642, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.087953933314757e-06, |
|
"loss": 1.8082, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.04151574254667e-06, |
|
"loss": 1.763, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.995077551778583e-06, |
|
"loss": 1.7831, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.948639361010495e-06, |
|
"loss": 1.7592, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.902201170242408e-06, |
|
"loss": 1.7907, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.855762979474321e-06, |
|
"loss": 1.7634, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.809324788706233e-06, |
|
"loss": 1.7956, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.762886597938144e-06, |
|
"loss": 1.7781, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.716448407170057e-06, |
|
"loss": 1.7713, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.670010216401969e-06, |
|
"loss": 1.751, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6496267318725586, |
|
"eval_rouge1": 24.5728, |
|
"eval_rouge2": 11.8886, |
|
"eval_rougeL": 20.3411, |
|
"eval_rougeLsum": 23.1906, |
|
"eval_runtime": 1018.4137, |
|
"eval_samples_per_second": 13.126, |
|
"eval_steps_per_second": 1.641, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.623572025633882e-06, |
|
"loss": 1.7497, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.577133834865795e-06, |
|
"loss": 1.7948, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.530695644097706e-06, |
|
"loss": 1.7926, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.48425745332962e-06, |
|
"loss": 1.7706, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.437819262561531e-06, |
|
"loss": 1.7496, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.391381071793442e-06, |
|
"loss": 1.8102, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.344942881025355e-06, |
|
"loss": 1.7853, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.298504690257269e-06, |
|
"loss": 1.8141, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.25206649948918e-06, |
|
"loss": 1.8381, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.205628308721093e-06, |
|
"loss": 1.7577, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6477200984954834, |
|
"eval_rouge1": 24.5616, |
|
"eval_rouge2": 11.8489, |
|
"eval_rougeL": 20.3021, |
|
"eval_rougeLsum": 23.1754, |
|
"eval_runtime": 1017.6231, |
|
"eval_samples_per_second": 13.136, |
|
"eval_steps_per_second": 1.642, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.159190117953006e-06, |
|
"loss": 1.8061, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.112751927184916e-06, |
|
"loss": 1.7683, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.06631373641683e-06, |
|
"loss": 1.8018, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.019875545648742e-06, |
|
"loss": 1.7921, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.973437354880654e-06, |
|
"loss": 1.7881, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.926999164112567e-06, |
|
"loss": 1.7732, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.88056097334448e-06, |
|
"loss": 1.811, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.834122782576391e-06, |
|
"loss": 1.7783, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.787684591808303e-06, |
|
"loss": 1.7847, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.741246401040216e-06, |
|
"loss": 1.8, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6473010778427124, |
|
"eval_rouge1": 24.5614, |
|
"eval_rouge2": 11.8663, |
|
"eval_rougeL": 20.3282, |
|
"eval_rougeLsum": 23.1868, |
|
"eval_runtime": 1018.187, |
|
"eval_samples_per_second": 13.129, |
|
"eval_steps_per_second": 1.641, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.694808210272127e-06, |
|
"loss": 1.8041, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.64837001950404e-06, |
|
"loss": 1.8265, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.601931828735953e-06, |
|
"loss": 1.782, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.555493637967866e-06, |
|
"loss": 1.7359, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.509055447199777e-06, |
|
"loss": 1.7759, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.462617256431689e-06, |
|
"loss": 1.8017, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.416179065663602e-06, |
|
"loss": 1.7952, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.369740874895514e-06, |
|
"loss": 1.8326, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.323302684127427e-06, |
|
"loss": 1.7451, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.27686449335934e-06, |
|
"loss": 1.7859, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.64827299118042, |
|
"eval_rouge1": 24.5594, |
|
"eval_rouge2": 11.8426, |
|
"eval_rougeL": 20.3197, |
|
"eval_rougeLsum": 23.191, |
|
"eval_runtime": 1017.4363, |
|
"eval_samples_per_second": 13.139, |
|
"eval_steps_per_second": 1.642, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.230426302591252e-06, |
|
"loss": 1.7739, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.1839881118231635e-06, |
|
"loss": 1.7942, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.137549921055076e-06, |
|
"loss": 1.7971, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.091111730286988e-06, |
|
"loss": 1.8096, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.0446735395189e-06, |
|
"loss": 1.7967, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.9982353487508135e-06, |
|
"loss": 1.7789, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.951797157982726e-06, |
|
"loss": 1.8053, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.905358967214638e-06, |
|
"loss": 1.7866, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.8589207764465495e-06, |
|
"loss": 1.7849, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.812482585678462e-06, |
|
"loss": 1.7984, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6468589305877686, |
|
"eval_rouge1": 24.5732, |
|
"eval_rouge2": 11.8258, |
|
"eval_rougeL": 20.3204, |
|
"eval_rougeLsum": 23.1958, |
|
"eval_runtime": 1017.2834, |
|
"eval_samples_per_second": 13.141, |
|
"eval_steps_per_second": 1.643, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.766044394910374e-06, |
|
"loss": 1.7749, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.719606204142287e-06, |
|
"loss": 1.7922, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.6731680133741995e-06, |
|
"loss": 1.7375, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.626729822606112e-06, |
|
"loss": 1.7701, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.580291631838025e-06, |
|
"loss": 1.7792, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.5338534410699355e-06, |
|
"loss": 1.7739, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.487415250301848e-06, |
|
"loss": 1.7358, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.440977059533761e-06, |
|
"loss": 1.7957, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.394538868765673e-06, |
|
"loss": 1.8148, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.3481006779975855e-06, |
|
"loss": 1.7943, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6477123498916626, |
|
"eval_rouge1": 24.5888, |
|
"eval_rouge2": 11.8602, |
|
"eval_rougeL": 20.3352, |
|
"eval_rougeLsum": 23.2181, |
|
"eval_runtime": 1018.432, |
|
"eval_samples_per_second": 13.126, |
|
"eval_steps_per_second": 1.641, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.301662487229499e-06, |
|
"loss": 1.7667, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.255224296461411e-06, |
|
"loss": 1.8027, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.208786105693322e-06, |
|
"loss": 1.7746, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.1623479149252355e-06, |
|
"loss": 1.7585, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.115909724157147e-06, |
|
"loss": 1.7668, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.069471533389059e-06, |
|
"loss": 1.7702, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.023033342620972e-06, |
|
"loss": 1.7837, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.976595151852884e-06, |
|
"loss": 1.8103, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.930156961084796e-06, |
|
"loss": 1.7929, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.883718770316709e-06, |
|
"loss": 1.7888, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.647167682647705, |
|
"eval_rouge1": 24.5781, |
|
"eval_rouge2": 11.844, |
|
"eval_rougeL": 20.3272, |
|
"eval_rougeLsum": 23.216, |
|
"eval_runtime": 1019.3426, |
|
"eval_samples_per_second": 13.114, |
|
"eval_steps_per_second": 1.639, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.837280579548621e-06, |
|
"loss": 1.7946, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.790842388780533e-06, |
|
"loss": 1.8053, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.744404198012446e-06, |
|
"loss": 1.7827, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.697966007244358e-06, |
|
"loss": 1.7985, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.65152781647627e-06, |
|
"loss": 1.7939, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.605089625708183e-06, |
|
"loss": 1.7877, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.558651434940095e-06, |
|
"loss": 1.7493, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.512213244172007e-06, |
|
"loss": 1.7465, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.46577505340392e-06, |
|
"loss": 1.7958, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.419336862635832e-06, |
|
"loss": 1.7803, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6482892036437988, |
|
"eval_rouge1": 24.5454, |
|
"eval_rouge2": 11.8245, |
|
"eval_rougeL": 20.2917, |
|
"eval_rougeLsum": 23.1727, |
|
"eval_runtime": 1025.0182, |
|
"eval_samples_per_second": 13.042, |
|
"eval_steps_per_second": 1.63, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.372898671867744e-06, |
|
"loss": 1.7544, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.326460481099657e-06, |
|
"loss": 1.8355, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.280022290331569e-06, |
|
"loss": 1.7922, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.233584099563481e-06, |
|
"loss": 1.7703, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.1871459087953935e-06, |
|
"loss": 1.7628, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.140707718027306e-06, |
|
"loss": 1.8141, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.094269527259218e-06, |
|
"loss": 1.7856, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.047831336491131e-06, |
|
"loss": 1.8083, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.001393145723043e-06, |
|
"loss": 1.773, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.954954954954955e-06, |
|
"loss": 1.8106, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6460872888565063, |
|
"eval_rouge1": 24.5694, |
|
"eval_rouge2": 11.8344, |
|
"eval_rougeL": 20.3123, |
|
"eval_rougeLsum": 23.1934, |
|
"eval_runtime": 1025.7089, |
|
"eval_samples_per_second": 13.033, |
|
"eval_steps_per_second": 1.629, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.908516764186868e-06, |
|
"loss": 1.7935, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.8620785734187795e-06, |
|
"loss": 1.8337, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.815640382650692e-06, |
|
"loss": 1.83, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.769202191882605e-06, |
|
"loss": 1.8323, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.722764001114516e-06, |
|
"loss": 1.7592, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.676325810346429e-06, |
|
"loss": 1.8037, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.629887619578342e-06, |
|
"loss": 1.765, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.583449428810254e-06, |
|
"loss": 1.7744, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.5370112380421655e-06, |
|
"loss": 1.8041, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.490573047274079e-06, |
|
"loss": 1.8713, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6453874111175537, |
|
"eval_rouge1": 24.5906, |
|
"eval_rouge2": 11.8573, |
|
"eval_rougeL": 20.3447, |
|
"eval_rougeLsum": 23.2181, |
|
"eval_runtime": 1023.9699, |
|
"eval_samples_per_second": 13.055, |
|
"eval_steps_per_second": 1.632, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.444134856505991e-06, |
|
"loss": 1.7536, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.397696665737902e-06, |
|
"loss": 1.8293, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.3512584749698155e-06, |
|
"loss": 1.7953, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.304820284201728e-06, |
|
"loss": 1.7937, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.25838209343364e-06, |
|
"loss": 1.7473, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.211943902665552e-06, |
|
"loss": 1.7745, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.165505711897465e-06, |
|
"loss": 1.815, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.119067521129377e-06, |
|
"loss": 1.7747, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.072629330361289e-06, |
|
"loss": 1.7989, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.0261911395932016e-06, |
|
"loss": 1.7655, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6468464136123657, |
|
"eval_rouge1": 24.5709, |
|
"eval_rouge2": 11.8573, |
|
"eval_rougeL": 20.3139, |
|
"eval_rougeLsum": 23.1994, |
|
"eval_runtime": 1022.5566, |
|
"eval_samples_per_second": 13.073, |
|
"eval_steps_per_second": 1.634, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.979752948825114e-06, |
|
"loss": 1.7739, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.933314758057027e-06, |
|
"loss": 1.7992, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.886876567288938e-06, |
|
"loss": 1.7752, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.840438376520851e-06, |
|
"loss": 1.7836, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.7940001857527634e-06, |
|
"loss": 1.7634, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7475619949846753e-06, |
|
"loss": 1.7795, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.701123804216588e-06, |
|
"loss": 1.7955, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6546856134485003e-06, |
|
"loss": 1.7791, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.608247422680412e-06, |
|
"loss": 1.8135, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.561809231912325e-06, |
|
"loss": 1.7616, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_gen_len": 18.999775583482943, |
|
"eval_loss": 1.6464020013809204, |
|
"eval_rouge1": 24.5852, |
|
"eval_rouge2": 11.8531, |
|
"eval_rougeL": 20.3172, |
|
"eval_rougeLsum": 23.2089, |
|
"eval_runtime": 1032.8825, |
|
"eval_samples_per_second": 12.942, |
|
"eval_steps_per_second": 1.618, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.515371041144237e-06, |
|
"loss": 1.7835, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.46893285037615e-06, |
|
"loss": 1.8031, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.4224946596080617e-06, |
|
"loss": 1.8073, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.376056468839974e-06, |
|
"loss": 1.8209, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3296182780718867e-06, |
|
"loss": 1.763, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.2831800873037986e-06, |
|
"loss": 1.7552, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.236741896535711e-06, |
|
"loss": 1.7884, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1903037057676236e-06, |
|
"loss": 1.7717, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1438655149995363e-06, |
|
"loss": 1.7844, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.097427324231448e-06, |
|
"loss": 1.7581, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_gen_len": 18.99970077797726, |
|
"eval_loss": 1.6467525959014893, |
|
"eval_rouge1": 24.5748, |
|
"eval_rouge2": 11.8452, |
|
"eval_rougeL": 20.3043, |
|
"eval_rougeLsum": 23.1849, |
|
"eval_runtime": 1030.0572, |
|
"eval_samples_per_second": 12.978, |
|
"eval_steps_per_second": 1.622, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.0509891334633604e-06, |
|
"loss": 1.7973, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.0045509426952727e-06, |
|
"loss": 1.7992, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.958112751927185e-06, |
|
"loss": 1.7591, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9116745611590973e-06, |
|
"loss": 1.7999, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.86523637039101e-06, |
|
"loss": 1.8001, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.818798179622922e-06, |
|
"loss": 1.8029, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.772359988854834e-06, |
|
"loss": 1.7549, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.725921798086747e-06, |
|
"loss": 1.7741, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6794836073186587e-06, |
|
"loss": 1.7553, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6330454165505714e-06, |
|
"loss": 1.7743, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6462187767028809, |
|
"eval_rouge1": 24.5665, |
|
"eval_rouge2": 11.8328, |
|
"eval_rougeL": 20.2992, |
|
"eval_rougeLsum": 23.1896, |
|
"eval_runtime": 1029.3539, |
|
"eval_samples_per_second": 12.987, |
|
"eval_steps_per_second": 1.623, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.5866072257824837e-06, |
|
"loss": 1.7841, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.540169035014396e-06, |
|
"loss": 1.7552, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.4937308442463083e-06, |
|
"loss": 1.7752, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.4472926534782206e-06, |
|
"loss": 1.799, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.400854462710133e-06, |
|
"loss": 1.7874, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.354416271942045e-06, |
|
"loss": 1.7632, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.307978081173958e-06, |
|
"loss": 1.7783, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.2615398904058697e-06, |
|
"loss": 1.7644, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.215101699637782e-06, |
|
"loss": 1.8257, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1686635088696947e-06, |
|
"loss": 1.78, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_gen_len": 18.99955116696589, |
|
"eval_loss": 1.6458466053009033, |
|
"eval_rouge1": 24.5716, |
|
"eval_rouge2": 11.8399, |
|
"eval_rougeL": 20.31, |
|
"eval_rougeLsum": 23.1943, |
|
"eval_runtime": 1026.5447, |
|
"eval_samples_per_second": 13.022, |
|
"eval_steps_per_second": 1.628, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1222253181016066e-06, |
|
"loss": 1.7643, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0757871273335193e-06, |
|
"loss": 1.7706, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0293489365654316e-06, |
|
"loss": 1.8195, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.982910745797344e-06, |
|
"loss": 1.8257, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.936472555029256e-06, |
|
"loss": 1.8226, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8900343642611683e-06, |
|
"loss": 1.7884, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.843596173493081e-06, |
|
"loss": 1.8019, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.797157982724993e-06, |
|
"loss": 1.767, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7507197919569055e-06, |
|
"loss": 1.8028, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7042816011888178e-06, |
|
"loss": 1.8162, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6455563306808472, |
|
"eval_rouge1": 24.5719, |
|
"eval_rouge2": 11.8358, |
|
"eval_rougeL": 20.3132, |
|
"eval_rougeLsum": 23.1921, |
|
"eval_runtime": 1021.2047, |
|
"eval_samples_per_second": 13.09, |
|
"eval_steps_per_second": 1.636, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.65784341042073e-06, |
|
"loss": 1.8019, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6114052196526424e-06, |
|
"loss": 1.7694, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5649670288845547e-06, |
|
"loss": 1.7769, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.518528838116467e-06, |
|
"loss": 1.7884, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4720906473483793e-06, |
|
"loss": 1.8234, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4256524565802918e-06, |
|
"loss": 1.7983, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.379214265812204e-06, |
|
"loss": 1.7418, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3327760750441163e-06, |
|
"loss": 1.7558, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2863378842760288e-06, |
|
"loss": 1.7995, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.239899693507941e-06, |
|
"loss": 1.7862, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6462281942367554, |
|
"eval_rouge1": 24.5938, |
|
"eval_rouge2": 11.8624, |
|
"eval_rougeL": 20.337, |
|
"eval_rougeLsum": 23.2131, |
|
"eval_runtime": 1018.9945, |
|
"eval_samples_per_second": 13.119, |
|
"eval_steps_per_second": 1.64, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1934615027398532e-06, |
|
"loss": 1.7785, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1470233119717657e-06, |
|
"loss": 1.7551, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.100585121203678e-06, |
|
"loss": 1.7387, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0541469304355903e-06, |
|
"loss": 1.7499, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0077087396675028e-06, |
|
"loss": 1.7999, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.612705488994148e-07, |
|
"loss": 1.7999, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.148323581313272e-07, |
|
"loss": 1.7517, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.683941673632395e-07, |
|
"loss": 1.788, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.219559765951519e-07, |
|
"loss": 1.7931, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.755177858270642e-07, |
|
"loss": 1.7995, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6458855867385864, |
|
"eval_rouge1": 24.5885, |
|
"eval_rouge2": 11.8606, |
|
"eval_rougeL": 20.3325, |
|
"eval_rougeLsum": 23.2137, |
|
"eval_runtime": 1019.094, |
|
"eval_samples_per_second": 13.118, |
|
"eval_steps_per_second": 1.64, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.290795950589765e-07, |
|
"loss": 1.7883, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.826414042908889e-07, |
|
"loss": 1.8355, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.362032135228012e-07, |
|
"loss": 1.7487, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.897650227547135e-07, |
|
"loss": 1.7886, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.433268319866259e-07, |
|
"loss": 1.8009, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.968886412185381e-07, |
|
"loss": 1.7814, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.504504504504505e-07, |
|
"loss": 1.7931, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.040122596823628e-07, |
|
"loss": 1.7789, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.575740689142751e-07, |
|
"loss": 1.7562, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.1113587814618745e-07, |
|
"loss": 1.7559, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_gen_len": 18.999625972471573, |
|
"eval_loss": 1.6453537940979004, |
|
"eval_rouge1": 24.593, |
|
"eval_rouge2": 11.861, |
|
"eval_rougeL": 20.3401, |
|
"eval_rougeLsum": 23.2188, |
|
"eval_runtime": 1019.1949, |
|
"eval_samples_per_second": 13.116, |
|
"eval_steps_per_second": 1.64, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6469768737809974e-07, |
|
"loss": 1.7975, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.182594966100121e-07, |
|
"loss": 1.7641, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.718213058419244e-07, |
|
"loss": 1.7987, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.2538311507383673e-07, |
|
"loss": 1.7663, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.894492430574905e-08, |
|
"loss": 1.8012, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.2506733537661375e-08, |
|
"loss": 1.7603, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 107670, |
|
"total_flos": 2.3279391489785856e+17, |
|
"train_loss": 0.2034699499612622, |
|
"train_runtime": 19944.6617, |
|
"train_samples_per_second": 43.186, |
|
"train_steps_per_second": 5.398 |
|
} |
|
], |
|
"max_steps": 107670, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.3279391489785856e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|