|
{ |
|
"best_metric": 3.4803478717803955, |
|
"best_model_checkpoint": "opus-mt-zh-en-finetuned-zh-to-en/checkpoint-2", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 5.6918, |
|
"eval_gen_len": 57.3333, |
|
"eval_loss": 3.5217063426971436, |
|
"eval_runtime": 10.9763, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.091, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 7.6304, |
|
"eval_gen_len": 55.3333, |
|
"eval_loss": 3.4803478717803955, |
|
"eval_runtime": 8.3555, |
|
"eval_samples_per_second": 0.359, |
|
"eval_steps_per_second": 0.12, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 7.9453, |
|
"eval_gen_len": 56.0, |
|
"eval_loss": 3.491027593612671, |
|
"eval_runtime": 9.8597, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.101, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 6.5495, |
|
"eval_gen_len": 79.0, |
|
"eval_loss": 3.5166053771972656, |
|
"eval_runtime": 13.007, |
|
"eval_samples_per_second": 0.231, |
|
"eval_steps_per_second": 0.077, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 6.6932, |
|
"eval_gen_len": 80.0, |
|
"eval_loss": 3.5517735481262207, |
|
"eval_runtime": 14.9524, |
|
"eval_samples_per_second": 0.201, |
|
"eval_steps_per_second": 0.067, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 5.9333, |
|
"eval_gen_len": 137.0, |
|
"eval_loss": 3.5805959701538086, |
|
"eval_runtime": 61.8901, |
|
"eval_samples_per_second": 0.048, |
|
"eval_steps_per_second": 0.016, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 5.6605, |
|
"eval_gen_len": 133.6667, |
|
"eval_loss": 3.6102888584136963, |
|
"eval_runtime": 55.757, |
|
"eval_samples_per_second": 0.054, |
|
"eval_steps_per_second": 0.018, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 7.1686, |
|
"eval_gen_len": 135.3333, |
|
"eval_loss": 3.645458936691284, |
|
"eval_runtime": 52.3901, |
|
"eval_samples_per_second": 0.057, |
|
"eval_steps_per_second": 0.019, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 9.8037, |
|
"eval_gen_len": 146.0, |
|
"eval_loss": 3.6737775802612305, |
|
"eval_runtime": 50.7208, |
|
"eval_samples_per_second": 0.059, |
|
"eval_steps_per_second": 0.02, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 9.9552, |
|
"eval_gen_len": 139.0, |
|
"eval_loss": 3.707125186920166, |
|
"eval_runtime": 49.5408, |
|
"eval_samples_per_second": 0.061, |
|
"eval_steps_per_second": 0.02, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 10.311, |
|
"eval_gen_len": 134.0, |
|
"eval_loss": 3.7424490451812744, |
|
"eval_runtime": 45.4229, |
|
"eval_samples_per_second": 0.066, |
|
"eval_steps_per_second": 0.022, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 10.1929, |
|
"eval_gen_len": 131.6667, |
|
"eval_loss": 3.786301612854004, |
|
"eval_runtime": 45.6583, |
|
"eval_samples_per_second": 0.066, |
|
"eval_steps_per_second": 0.022, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 9.8671, |
|
"eval_gen_len": 119.3333, |
|
"eval_loss": 3.825416088104248, |
|
"eval_runtime": 44.3396, |
|
"eval_samples_per_second": 0.068, |
|
"eval_steps_per_second": 0.023, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 11.4259, |
|
"eval_gen_len": 107.3333, |
|
"eval_loss": 3.866556167602539, |
|
"eval_runtime": 60.118, |
|
"eval_samples_per_second": 0.05, |
|
"eval_steps_per_second": 0.017, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 11.4172, |
|
"eval_gen_len": 100.6667, |
|
"eval_loss": 3.900843620300293, |
|
"eval_runtime": 56.7929, |
|
"eval_samples_per_second": 0.053, |
|
"eval_steps_per_second": 0.018, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 10.8738, |
|
"eval_gen_len": 117.6667, |
|
"eval_loss": 3.935436964035034, |
|
"eval_runtime": 38.5973, |
|
"eval_samples_per_second": 0.078, |
|
"eval_steps_per_second": 0.026, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 10.7717, |
|
"eval_gen_len": 117.0, |
|
"eval_loss": 3.9768829345703125, |
|
"eval_runtime": 36.4507, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.027, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 9.9034, |
|
"eval_gen_len": 118.6667, |
|
"eval_loss": 4.017246246337891, |
|
"eval_runtime": 36.6426, |
|
"eval_samples_per_second": 0.082, |
|
"eval_steps_per_second": 0.027, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 10.0376, |
|
"eval_gen_len": 121.6667, |
|
"eval_loss": 4.05275821685791, |
|
"eval_runtime": 34.9881, |
|
"eval_samples_per_second": 0.086, |
|
"eval_steps_per_second": 0.029, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 8.6054, |
|
"eval_gen_len": 61.0, |
|
"eval_loss": 4.080726623535156, |
|
"eval_runtime": 9.9523, |
|
"eval_samples_per_second": 0.301, |
|
"eval_steps_per_second": 0.1, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 10.9496, |
|
"eval_gen_len": 67.0, |
|
"eval_loss": 4.109140396118164, |
|
"eval_runtime": 11.0517, |
|
"eval_samples_per_second": 0.271, |
|
"eval_steps_per_second": 0.09, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 10.8268, |
|
"eval_gen_len": 65.6667, |
|
"eval_loss": 4.133188247680664, |
|
"eval_runtime": 10.5163, |
|
"eval_samples_per_second": 0.285, |
|
"eval_steps_per_second": 0.095, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 10.5556, |
|
"eval_gen_len": 63.6667, |
|
"eval_loss": 4.158883571624756, |
|
"eval_runtime": 9.4741, |
|
"eval_samples_per_second": 0.317, |
|
"eval_steps_per_second": 0.106, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 10.8678, |
|
"eval_gen_len": 62.0, |
|
"eval_loss": 4.181970119476318, |
|
"eval_runtime": 10.5606, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.095, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 10.8678, |
|
"eval_gen_len": 61.6667, |
|
"eval_loss": 4.196631908416748, |
|
"eval_runtime": 9.7369, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.103, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 10.27, |
|
"eval_gen_len": 61.6667, |
|
"eval_loss": 4.209479808807373, |
|
"eval_runtime": 9.9007, |
|
"eval_samples_per_second": 0.303, |
|
"eval_steps_per_second": 0.101, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 10.6577, |
|
"eval_gen_len": 60.0, |
|
"eval_loss": 4.220566272735596, |
|
"eval_runtime": 9.9798, |
|
"eval_samples_per_second": 0.301, |
|
"eval_steps_per_second": 0.1, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 10.2137, |
|
"eval_gen_len": 60.6667, |
|
"eval_loss": 4.2330474853515625, |
|
"eval_runtime": 11.0027, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.091, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 10.0108, |
|
"eval_gen_len": 62.0, |
|
"eval_loss": 4.245401382446289, |
|
"eval_runtime": 10.7321, |
|
"eval_samples_per_second": 0.28, |
|
"eval_steps_per_second": 0.093, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bleu": 10.6984, |
|
"eval_gen_len": 60.0, |
|
"eval_loss": 4.257299423217773, |
|
"eval_runtime": 9.8765, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.101, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bleu": 9.6494, |
|
"eval_gen_len": 83.0, |
|
"eval_loss": 4.2664995193481445, |
|
"eval_runtime": 13.2171, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.076, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bleu": 9.7503, |
|
"eval_gen_len": 84.3333, |
|
"eval_loss": 4.276342868804932, |
|
"eval_runtime": 12.9373, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.077, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bleu": 9.7503, |
|
"eval_gen_len": 84.3333, |
|
"eval_loss": 4.287381649017334, |
|
"eval_runtime": 12.9473, |
|
"eval_samples_per_second": 0.232, |
|
"eval_steps_per_second": 0.077, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bleu": 9.7503, |
|
"eval_gen_len": 84.3333, |
|
"eval_loss": 4.295342445373535, |
|
"eval_runtime": 13.0533, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.077, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bleu": 9.7503, |
|
"eval_gen_len": 84.3333, |
|
"eval_loss": 4.301455020904541, |
|
"eval_runtime": 13.2102, |
|
"eval_samples_per_second": 0.227, |
|
"eval_steps_per_second": 0.076, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bleu": 9.4923, |
|
"eval_gen_len": 70.0, |
|
"eval_loss": 4.303869247436523, |
|
"eval_runtime": 105.6574, |
|
"eval_samples_per_second": 0.028, |
|
"eval_steps_per_second": 0.009, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bleu": 9.7956, |
|
"eval_gen_len": 64.6667, |
|
"eval_loss": 4.308076858520508, |
|
"eval_runtime": 86.8335, |
|
"eval_samples_per_second": 0.035, |
|
"eval_steps_per_second": 0.012, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bleu": 10.3592, |
|
"eval_gen_len": 70.6667, |
|
"eval_loss": 4.311928749084473, |
|
"eval_runtime": 10.4025, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.096, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bleu": 11.5296, |
|
"eval_gen_len": 86.0, |
|
"eval_loss": 4.315915584564209, |
|
"eval_runtime": 13.3622, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.075, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bleu": 11.1681, |
|
"eval_gen_len": 78.0, |
|
"eval_loss": 4.319802284240723, |
|
"eval_runtime": 13.5231, |
|
"eval_samples_per_second": 0.222, |
|
"eval_steps_per_second": 0.074, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bleu": 10.2925, |
|
"eval_gen_len": 82.6667, |
|
"eval_loss": 4.323220252990723, |
|
"eval_runtime": 13.3207, |
|
"eval_samples_per_second": 0.225, |
|
"eval_steps_per_second": 0.075, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bleu": 11.2059, |
|
"eval_gen_len": 82.3333, |
|
"eval_loss": 4.325761795043945, |
|
"eval_runtime": 16.8934, |
|
"eval_samples_per_second": 0.178, |
|
"eval_steps_per_second": 0.059, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bleu": 11.1664, |
|
"eval_gen_len": 85.3333, |
|
"eval_loss": 4.327906131744385, |
|
"eval_runtime": 14.4307, |
|
"eval_samples_per_second": 0.208, |
|
"eval_steps_per_second": 0.069, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bleu": 11.2609, |
|
"eval_gen_len": 81.6667, |
|
"eval_loss": 4.329981803894043, |
|
"eval_runtime": 14.847, |
|
"eval_samples_per_second": 0.202, |
|
"eval_steps_per_second": 0.067, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bleu": 11.2609, |
|
"eval_gen_len": 81.6667, |
|
"eval_loss": 4.332195281982422, |
|
"eval_runtime": 15.2747, |
|
"eval_samples_per_second": 0.196, |
|
"eval_steps_per_second": 0.065, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bleu": 11.0203, |
|
"eval_gen_len": 90.3333, |
|
"eval_loss": 4.3332905769348145, |
|
"eval_runtime": 15.7496, |
|
"eval_samples_per_second": 0.19, |
|
"eval_steps_per_second": 0.063, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_bleu": 11.1664, |
|
"eval_gen_len": 85.3333, |
|
"eval_loss": 4.3369364738464355, |
|
"eval_runtime": 14.8004, |
|
"eval_samples_per_second": 0.203, |
|
"eval_steps_per_second": 0.068, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_bleu": 11.3295, |
|
"eval_gen_len": 82.0, |
|
"eval_loss": 4.340444564819336, |
|
"eval_runtime": 13.1824, |
|
"eval_samples_per_second": 0.228, |
|
"eval_steps_per_second": 0.076, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_bleu": 11.1364, |
|
"eval_gen_len": 68.3333, |
|
"eval_loss": 4.343842506408691, |
|
"eval_runtime": 12.8795, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.078, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_bleu": 11.1364, |
|
"eval_gen_len": 68.3333, |
|
"eval_loss": 4.347080230712891, |
|
"eval_runtime": 13.1248, |
|
"eval_samples_per_second": 0.229, |
|
"eval_steps_per_second": 0.076, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_bleu": 11.1208, |
|
"eval_gen_len": 65.6667, |
|
"eval_loss": 4.349867820739746, |
|
"eval_runtime": 11.6147, |
|
"eval_samples_per_second": 0.258, |
|
"eval_steps_per_second": 0.086, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_bleu": 11.1208, |
|
"eval_gen_len": 65.6667, |
|
"eval_loss": 4.352280616760254, |
|
"eval_runtime": 11.3187, |
|
"eval_samples_per_second": 0.265, |
|
"eval_steps_per_second": 0.088, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_bleu": 11.1208, |
|
"eval_gen_len": 65.6667, |
|
"eval_loss": 4.353754997253418, |
|
"eval_runtime": 12.1023, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.083, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_bleu": 10.9215, |
|
"eval_gen_len": 66.0, |
|
"eval_loss": 4.355079174041748, |
|
"eval_runtime": 10.6524, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.094, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_bleu": 11.0309, |
|
"eval_gen_len": 72.0, |
|
"eval_loss": 4.356247901916504, |
|
"eval_runtime": 14.1128, |
|
"eval_samples_per_second": 0.213, |
|
"eval_steps_per_second": 0.071, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_bleu": 11.0309, |
|
"eval_gen_len": 72.0, |
|
"eval_loss": 4.357237815856934, |
|
"eval_runtime": 14.0892, |
|
"eval_samples_per_second": 0.213, |
|
"eval_steps_per_second": 0.071, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_bleu": 10.678, |
|
"eval_gen_len": 71.3333, |
|
"eval_loss": 4.358083248138428, |
|
"eval_runtime": 12.7806, |
|
"eval_samples_per_second": 0.235, |
|
"eval_steps_per_second": 0.078, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_bleu": 10.678, |
|
"eval_gen_len": 71.3333, |
|
"eval_loss": 4.358962059020996, |
|
"eval_runtime": 12.8538, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.078, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_bleu": 10.678, |
|
"eval_gen_len": 71.3333, |
|
"eval_loss": 4.359888076782227, |
|
"eval_runtime": 12.3355, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.081, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_bleu": 10.678, |
|
"eval_gen_len": 71.3333, |
|
"eval_loss": 4.360787391662598, |
|
"eval_runtime": 12.7009, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.079, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_bleu": 10.4416, |
|
"eval_gen_len": 86.3333, |
|
"eval_loss": 4.36198091506958, |
|
"eval_runtime": 12.4279, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.08, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_bleu": 10.678, |
|
"eval_gen_len": 71.3333, |
|
"eval_loss": 4.362994194030762, |
|
"eval_runtime": 12.6907, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.079, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_bleu": 10.678, |
|
"eval_gen_len": 71.3333, |
|
"eval_loss": 4.366813659667969, |
|
"eval_runtime": 12.6482, |
|
"eval_samples_per_second": 0.237, |
|
"eval_steps_per_second": 0.079, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_bleu": 11.1601, |
|
"eval_gen_len": 74.6667, |
|
"eval_loss": 4.370214462280273, |
|
"eval_runtime": 11.7414, |
|
"eval_samples_per_second": 0.256, |
|
"eval_steps_per_second": 0.085, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_bleu": 11.137, |
|
"eval_gen_len": 78.0, |
|
"eval_loss": 4.37328577041626, |
|
"eval_runtime": 13.9564, |
|
"eval_samples_per_second": 0.215, |
|
"eval_steps_per_second": 0.072, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_bleu": 11.137, |
|
"eval_gen_len": 78.0, |
|
"eval_loss": 4.3749165534973145, |
|
"eval_runtime": 13.8294, |
|
"eval_samples_per_second": 0.217, |
|
"eval_steps_per_second": 0.072, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_bleu": 11.137, |
|
"eval_gen_len": 78.0, |
|
"eval_loss": 4.3767194747924805, |
|
"eval_runtime": 13.7066, |
|
"eval_samples_per_second": 0.219, |
|
"eval_steps_per_second": 0.073, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_bleu": 10.8916, |
|
"eval_gen_len": 88.0, |
|
"eval_loss": 4.3782854080200195, |
|
"eval_runtime": 16.3165, |
|
"eval_samples_per_second": 0.184, |
|
"eval_steps_per_second": 0.061, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_bleu": 10.7733, |
|
"eval_gen_len": 67.3333, |
|
"eval_loss": 4.379751682281494, |
|
"eval_runtime": 11.7272, |
|
"eval_samples_per_second": 0.256, |
|
"eval_steps_per_second": 0.085, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_bleu": 11.5509, |
|
"eval_gen_len": 66.3333, |
|
"eval_loss": 4.381042957305908, |
|
"eval_runtime": 11.8594, |
|
"eval_samples_per_second": 0.253, |
|
"eval_steps_per_second": 0.084, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_bleu": 11.4262, |
|
"eval_gen_len": 70.0, |
|
"eval_loss": 4.382328510284424, |
|
"eval_runtime": 13.0297, |
|
"eval_samples_per_second": 0.23, |
|
"eval_steps_per_second": 0.077, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_bleu": 11.4262, |
|
"eval_gen_len": 70.0, |
|
"eval_loss": 4.383648872375488, |
|
"eval_runtime": 14.0598, |
|
"eval_samples_per_second": 0.213, |
|
"eval_steps_per_second": 0.071, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_bleu": 11.195, |
|
"eval_gen_len": 66.3333, |
|
"eval_loss": 4.384763717651367, |
|
"eval_runtime": 11.4527, |
|
"eval_samples_per_second": 0.262, |
|
"eval_steps_per_second": 0.087, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_bleu": 11.1115, |
|
"eval_gen_len": 67.0, |
|
"eval_loss": 4.384941577911377, |
|
"eval_runtime": 12.3777, |
|
"eval_samples_per_second": 0.242, |
|
"eval_steps_per_second": 0.081, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_bleu": 11.2741, |
|
"eval_gen_len": 67.6667, |
|
"eval_loss": 4.385134696960449, |
|
"eval_runtime": 12.3325, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.081, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_bleu": 11.6137, |
|
"eval_gen_len": 71.0, |
|
"eval_loss": 4.38496208190918, |
|
"eval_runtime": 12.6988, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.079, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_bleu": 11.2741, |
|
"eval_gen_len": 67.6667, |
|
"eval_loss": 4.3848419189453125, |
|
"eval_runtime": 11.4524, |
|
"eval_samples_per_second": 0.262, |
|
"eval_steps_per_second": 0.087, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_bleu": 11.1454, |
|
"eval_gen_len": 62.0, |
|
"eval_loss": 4.38457727432251, |
|
"eval_runtime": 10.4375, |
|
"eval_samples_per_second": 0.287, |
|
"eval_steps_per_second": 0.096, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_bleu": 11.1454, |
|
"eval_gen_len": 62.0, |
|
"eval_loss": 4.3842973709106445, |
|
"eval_runtime": 10.4146, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.096, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_bleu": 11.267, |
|
"eval_gen_len": 68.6667, |
|
"eval_loss": 4.383995532989502, |
|
"eval_runtime": 12.6896, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.079, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_bleu": 11.267, |
|
"eval_gen_len": 68.6667, |
|
"eval_loss": 4.383782386779785, |
|
"eval_runtime": 12.4982, |
|
"eval_samples_per_second": 0.24, |
|
"eval_steps_per_second": 0.08, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_bleu": 11.267, |
|
"eval_gen_len": 68.6667, |
|
"eval_loss": 4.383605480194092, |
|
"eval_runtime": 12.2559, |
|
"eval_samples_per_second": 0.245, |
|
"eval_steps_per_second": 0.082, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_bleu": 11.267, |
|
"eval_gen_len": 68.6667, |
|
"eval_loss": 4.383455753326416, |
|
"eval_runtime": 12.7951, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_bleu": 11.267, |
|
"eval_gen_len": 68.6667, |
|
"eval_loss": 4.38338565826416, |
|
"eval_runtime": 12.5193, |
|
"eval_samples_per_second": 0.24, |
|
"eval_steps_per_second": 0.08, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.383331298828125, |
|
"eval_runtime": 12.8027, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.38341760635376, |
|
"eval_runtime": 12.849, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.078, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.383518695831299, |
|
"eval_runtime": 12.3575, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.081, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.3836445808410645, |
|
"eval_runtime": 12.8174, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.383749961853027, |
|
"eval_runtime": 12.8695, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.078, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.383831977844238, |
|
"eval_runtime": 12.8527, |
|
"eval_samples_per_second": 0.233, |
|
"eval_steps_per_second": 0.078, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.383894920349121, |
|
"eval_runtime": 12.7272, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.079, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.383920669555664, |
|
"eval_runtime": 14.3034, |
|
"eval_samples_per_second": 0.21, |
|
"eval_steps_per_second": 0.07, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.38393497467041, |
|
"eval_runtime": 12.4454, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.08, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.384005546569824, |
|
"eval_runtime": 12.8192, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_bleu": 11.4092, |
|
"eval_gen_len": 69.0, |
|
"eval_loss": 4.3840460777282715, |
|
"eval_runtime": 12.4402, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.08, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_bleu": 10.7891, |
|
"eval_gen_len": 70.6667, |
|
"eval_loss": 4.384109020233154, |
|
"eval_runtime": 12.8195, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_bleu": 10.7891, |
|
"eval_gen_len": 70.6667, |
|
"eval_loss": 4.384162902832031, |
|
"eval_runtime": 12.3694, |
|
"eval_samples_per_second": 0.243, |
|
"eval_steps_per_second": 0.081, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_bleu": 10.7891, |
|
"eval_gen_len": 70.6667, |
|
"eval_loss": 4.384200096130371, |
|
"eval_runtime": 12.8132, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_bleu": 10.7891, |
|
"eval_gen_len": 70.6667, |
|
"eval_loss": 4.384230613708496, |
|
"eval_runtime": 12.6103, |
|
"eval_samples_per_second": 0.238, |
|
"eval_steps_per_second": 0.079, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_bleu": 10.7891, |
|
"eval_gen_len": 70.6667, |
|
"eval_loss": 4.384246349334717, |
|
"eval_runtime": 12.8394, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.078, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7786030694400.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|