{ "best_metric": 3.4803478717803955, "best_model_checkpoint": "opus-mt-zh-en-finetuned-zh-to-en/checkpoint-2", "epoch": 100.0, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 5.6918, "eval_gen_len": 57.3333, "eval_loss": 3.5217063426971436, "eval_runtime": 10.9763, "eval_samples_per_second": 0.273, "eval_steps_per_second": 0.091, "step": 1 }, { "epoch": 2.0, "eval_bleu": 7.6304, "eval_gen_len": 55.3333, "eval_loss": 3.4803478717803955, "eval_runtime": 8.3555, "eval_samples_per_second": 0.359, "eval_steps_per_second": 0.12, "step": 2 }, { "epoch": 3.0, "eval_bleu": 7.9453, "eval_gen_len": 56.0, "eval_loss": 3.491027593612671, "eval_runtime": 9.8597, "eval_samples_per_second": 0.304, "eval_steps_per_second": 0.101, "step": 3 }, { "epoch": 4.0, "eval_bleu": 6.5495, "eval_gen_len": 79.0, "eval_loss": 3.5166053771972656, "eval_runtime": 13.007, "eval_samples_per_second": 0.231, "eval_steps_per_second": 0.077, "step": 4 }, { "epoch": 5.0, "eval_bleu": 6.6932, "eval_gen_len": 80.0, "eval_loss": 3.5517735481262207, "eval_runtime": 14.9524, "eval_samples_per_second": 0.201, "eval_steps_per_second": 0.067, "step": 5 }, { "epoch": 6.0, "eval_bleu": 5.9333, "eval_gen_len": 137.0, "eval_loss": 3.5805959701538086, "eval_runtime": 61.8901, "eval_samples_per_second": 0.048, "eval_steps_per_second": 0.016, "step": 6 }, { "epoch": 7.0, "eval_bleu": 5.6605, "eval_gen_len": 133.6667, "eval_loss": 3.6102888584136963, "eval_runtime": 55.757, "eval_samples_per_second": 0.054, "eval_steps_per_second": 0.018, "step": 7 }, { "epoch": 8.0, "eval_bleu": 7.1686, "eval_gen_len": 135.3333, "eval_loss": 3.645458936691284, "eval_runtime": 52.3901, "eval_samples_per_second": 0.057, "eval_steps_per_second": 0.019, "step": 8 }, { "epoch": 9.0, "eval_bleu": 9.8037, "eval_gen_len": 146.0, "eval_loss": 3.6737775802612305, "eval_runtime": 50.7208, "eval_samples_per_second": 0.059, "eval_steps_per_second": 0.02, "step": 9 }, { "epoch": 10.0, "eval_bleu": 9.9552, "eval_gen_len": 139.0, "eval_loss": 3.707125186920166, "eval_runtime": 49.5408, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.02, "step": 10 }, { "epoch": 11.0, "eval_bleu": 10.311, "eval_gen_len": 134.0, "eval_loss": 3.7424490451812744, "eval_runtime": 45.4229, "eval_samples_per_second": 0.066, "eval_steps_per_second": 0.022, "step": 11 }, { "epoch": 12.0, "eval_bleu": 10.1929, "eval_gen_len": 131.6667, "eval_loss": 3.786301612854004, "eval_runtime": 45.6583, "eval_samples_per_second": 0.066, "eval_steps_per_second": 0.022, "step": 12 }, { "epoch": 13.0, "eval_bleu": 9.8671, "eval_gen_len": 119.3333, "eval_loss": 3.825416088104248, "eval_runtime": 44.3396, "eval_samples_per_second": 0.068, "eval_steps_per_second": 0.023, "step": 13 }, { "epoch": 14.0, "eval_bleu": 11.4259, "eval_gen_len": 107.3333, "eval_loss": 3.866556167602539, "eval_runtime": 60.118, "eval_samples_per_second": 0.05, "eval_steps_per_second": 0.017, "step": 14 }, { "epoch": 15.0, "eval_bleu": 11.4172, "eval_gen_len": 100.6667, "eval_loss": 3.900843620300293, "eval_runtime": 56.7929, "eval_samples_per_second": 0.053, "eval_steps_per_second": 0.018, "step": 15 }, { "epoch": 16.0, "eval_bleu": 10.8738, "eval_gen_len": 117.6667, "eval_loss": 3.935436964035034, "eval_runtime": 38.5973, "eval_samples_per_second": 0.078, "eval_steps_per_second": 0.026, "step": 16 }, { "epoch": 17.0, "eval_bleu": 10.7717, "eval_gen_len": 117.0, "eval_loss": 3.9768829345703125, "eval_runtime": 36.4507, "eval_samples_per_second": 0.082, "eval_steps_per_second": 0.027, "step": 17 }, { "epoch": 18.0, "eval_bleu": 9.9034, "eval_gen_len": 118.6667, "eval_loss": 4.017246246337891, "eval_runtime": 36.6426, "eval_samples_per_second": 0.082, "eval_steps_per_second": 0.027, "step": 18 }, { "epoch": 19.0, "eval_bleu": 10.0376, "eval_gen_len": 121.6667, "eval_loss": 4.05275821685791, "eval_runtime": 34.9881, "eval_samples_per_second": 0.086, "eval_steps_per_second": 0.029, "step": 19 }, { "epoch": 20.0, "eval_bleu": 8.6054, "eval_gen_len": 61.0, "eval_loss": 4.080726623535156, "eval_runtime": 9.9523, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.1, "step": 20 }, { "epoch": 21.0, "eval_bleu": 10.9496, "eval_gen_len": 67.0, "eval_loss": 4.109140396118164, "eval_runtime": 11.0517, "eval_samples_per_second": 0.271, "eval_steps_per_second": 0.09, "step": 21 }, { "epoch": 22.0, "eval_bleu": 10.8268, "eval_gen_len": 65.6667, "eval_loss": 4.133188247680664, "eval_runtime": 10.5163, "eval_samples_per_second": 0.285, "eval_steps_per_second": 0.095, "step": 22 }, { "epoch": 23.0, "eval_bleu": 10.5556, "eval_gen_len": 63.6667, "eval_loss": 4.158883571624756, "eval_runtime": 9.4741, "eval_samples_per_second": 0.317, "eval_steps_per_second": 0.106, "step": 23 }, { "epoch": 24.0, "eval_bleu": 10.8678, "eval_gen_len": 62.0, "eval_loss": 4.181970119476318, "eval_runtime": 10.5606, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.095, "step": 24 }, { "epoch": 25.0, "eval_bleu": 10.8678, "eval_gen_len": 61.6667, "eval_loss": 4.196631908416748, "eval_runtime": 9.7369, "eval_samples_per_second": 0.308, "eval_steps_per_second": 0.103, "step": 25 }, { "epoch": 26.0, "eval_bleu": 10.27, "eval_gen_len": 61.6667, "eval_loss": 4.209479808807373, "eval_runtime": 9.9007, "eval_samples_per_second": 0.303, "eval_steps_per_second": 0.101, "step": 26 }, { "epoch": 27.0, "eval_bleu": 10.6577, "eval_gen_len": 60.0, "eval_loss": 4.220566272735596, "eval_runtime": 9.9798, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.1, "step": 27 }, { "epoch": 28.0, "eval_bleu": 10.2137, "eval_gen_len": 60.6667, "eval_loss": 4.2330474853515625, "eval_runtime": 11.0027, "eval_samples_per_second": 0.273, "eval_steps_per_second": 0.091, "step": 28 }, { "epoch": 29.0, "eval_bleu": 10.0108, "eval_gen_len": 62.0, "eval_loss": 4.245401382446289, "eval_runtime": 10.7321, "eval_samples_per_second": 0.28, "eval_steps_per_second": 0.093, "step": 29 }, { "epoch": 30.0, "eval_bleu": 10.6984, "eval_gen_len": 60.0, "eval_loss": 4.257299423217773, "eval_runtime": 9.8765, "eval_samples_per_second": 0.304, "eval_steps_per_second": 0.101, "step": 30 }, { "epoch": 31.0, "eval_bleu": 9.6494, "eval_gen_len": 83.0, "eval_loss": 4.2664995193481445, "eval_runtime": 13.2171, "eval_samples_per_second": 0.227, "eval_steps_per_second": 0.076, "step": 31 }, { "epoch": 32.0, "eval_bleu": 9.7503, "eval_gen_len": 84.3333, "eval_loss": 4.276342868804932, "eval_runtime": 12.9373, "eval_samples_per_second": 0.232, "eval_steps_per_second": 0.077, "step": 32 }, { "epoch": 33.0, "eval_bleu": 9.7503, "eval_gen_len": 84.3333, "eval_loss": 4.287381649017334, "eval_runtime": 12.9473, "eval_samples_per_second": 0.232, "eval_steps_per_second": 0.077, "step": 33 }, { "epoch": 34.0, "eval_bleu": 9.7503, "eval_gen_len": 84.3333, "eval_loss": 4.295342445373535, "eval_runtime": 13.0533, "eval_samples_per_second": 0.23, "eval_steps_per_second": 0.077, "step": 34 }, { "epoch": 35.0, "eval_bleu": 9.7503, "eval_gen_len": 84.3333, "eval_loss": 4.301455020904541, "eval_runtime": 13.2102, "eval_samples_per_second": 0.227, "eval_steps_per_second": 0.076, "step": 35 }, { "epoch": 36.0, "eval_bleu": 9.4923, "eval_gen_len": 70.0, "eval_loss": 4.303869247436523, "eval_runtime": 105.6574, "eval_samples_per_second": 0.028, "eval_steps_per_second": 0.009, "step": 36 }, { "epoch": 37.0, "eval_bleu": 9.7956, "eval_gen_len": 64.6667, "eval_loss": 4.308076858520508, "eval_runtime": 86.8335, "eval_samples_per_second": 0.035, "eval_steps_per_second": 0.012, "step": 37 }, { "epoch": 38.0, "eval_bleu": 10.3592, "eval_gen_len": 70.6667, "eval_loss": 4.311928749084473, "eval_runtime": 10.4025, "eval_samples_per_second": 0.288, "eval_steps_per_second": 0.096, "step": 38 }, { "epoch": 39.0, "eval_bleu": 11.5296, "eval_gen_len": 86.0, "eval_loss": 4.315915584564209, "eval_runtime": 13.3622, "eval_samples_per_second": 0.225, "eval_steps_per_second": 0.075, "step": 39 }, { "epoch": 40.0, "eval_bleu": 11.1681, "eval_gen_len": 78.0, "eval_loss": 4.319802284240723, "eval_runtime": 13.5231, "eval_samples_per_second": 0.222, "eval_steps_per_second": 0.074, "step": 40 }, { "epoch": 41.0, "eval_bleu": 10.2925, "eval_gen_len": 82.6667, "eval_loss": 4.323220252990723, "eval_runtime": 13.3207, "eval_samples_per_second": 0.225, "eval_steps_per_second": 0.075, "step": 41 }, { "epoch": 42.0, "eval_bleu": 11.2059, "eval_gen_len": 82.3333, "eval_loss": 4.325761795043945, "eval_runtime": 16.8934, "eval_samples_per_second": 0.178, "eval_steps_per_second": 0.059, "step": 42 }, { "epoch": 43.0, "eval_bleu": 11.1664, "eval_gen_len": 85.3333, "eval_loss": 4.327906131744385, "eval_runtime": 14.4307, "eval_samples_per_second": 0.208, "eval_steps_per_second": 0.069, "step": 43 }, { "epoch": 44.0, "eval_bleu": 11.2609, "eval_gen_len": 81.6667, "eval_loss": 4.329981803894043, "eval_runtime": 14.847, "eval_samples_per_second": 0.202, "eval_steps_per_second": 0.067, "step": 44 }, { "epoch": 45.0, "eval_bleu": 11.2609, "eval_gen_len": 81.6667, "eval_loss": 4.332195281982422, "eval_runtime": 15.2747, "eval_samples_per_second": 0.196, "eval_steps_per_second": 0.065, "step": 45 }, { "epoch": 46.0, "eval_bleu": 11.0203, "eval_gen_len": 90.3333, "eval_loss": 4.3332905769348145, "eval_runtime": 15.7496, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.063, "step": 46 }, { "epoch": 47.0, "eval_bleu": 11.1664, "eval_gen_len": 85.3333, "eval_loss": 4.3369364738464355, "eval_runtime": 14.8004, "eval_samples_per_second": 0.203, "eval_steps_per_second": 0.068, "step": 47 }, { "epoch": 48.0, "eval_bleu": 11.3295, "eval_gen_len": 82.0, "eval_loss": 4.340444564819336, "eval_runtime": 13.1824, "eval_samples_per_second": 0.228, "eval_steps_per_second": 0.076, "step": 48 }, { "epoch": 49.0, "eval_bleu": 11.1364, "eval_gen_len": 68.3333, "eval_loss": 4.343842506408691, "eval_runtime": 12.8795, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.078, "step": 49 }, { "epoch": 50.0, "eval_bleu": 11.1364, "eval_gen_len": 68.3333, "eval_loss": 4.347080230712891, "eval_runtime": 13.1248, "eval_samples_per_second": 0.229, "eval_steps_per_second": 0.076, "step": 50 }, { "epoch": 51.0, "eval_bleu": 11.1208, "eval_gen_len": 65.6667, "eval_loss": 4.349867820739746, "eval_runtime": 11.6147, "eval_samples_per_second": 0.258, "eval_steps_per_second": 0.086, "step": 51 }, { "epoch": 52.0, "eval_bleu": 11.1208, "eval_gen_len": 65.6667, "eval_loss": 4.352280616760254, "eval_runtime": 11.3187, "eval_samples_per_second": 0.265, "eval_steps_per_second": 0.088, "step": 52 }, { "epoch": 53.0, "eval_bleu": 11.1208, "eval_gen_len": 65.6667, "eval_loss": 4.353754997253418, "eval_runtime": 12.1023, "eval_samples_per_second": 0.248, "eval_steps_per_second": 0.083, "step": 53 }, { "epoch": 54.0, "eval_bleu": 10.9215, "eval_gen_len": 66.0, "eval_loss": 4.355079174041748, "eval_runtime": 10.6524, "eval_samples_per_second": 0.282, "eval_steps_per_second": 0.094, "step": 54 }, { "epoch": 55.0, "eval_bleu": 11.0309, "eval_gen_len": 72.0, "eval_loss": 4.356247901916504, "eval_runtime": 14.1128, "eval_samples_per_second": 0.213, "eval_steps_per_second": 0.071, "step": 55 }, { "epoch": 56.0, "eval_bleu": 11.0309, "eval_gen_len": 72.0, "eval_loss": 4.357237815856934, "eval_runtime": 14.0892, "eval_samples_per_second": 0.213, "eval_steps_per_second": 0.071, "step": 56 }, { "epoch": 57.0, "eval_bleu": 10.678, "eval_gen_len": 71.3333, "eval_loss": 4.358083248138428, "eval_runtime": 12.7806, "eval_samples_per_second": 0.235, "eval_steps_per_second": 0.078, "step": 57 }, { "epoch": 58.0, "eval_bleu": 10.678, "eval_gen_len": 71.3333, "eval_loss": 4.358962059020996, "eval_runtime": 12.8538, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.078, "step": 58 }, { "epoch": 59.0, "eval_bleu": 10.678, "eval_gen_len": 71.3333, "eval_loss": 4.359888076782227, "eval_runtime": 12.3355, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.081, "step": 59 }, { "epoch": 60.0, "eval_bleu": 10.678, "eval_gen_len": 71.3333, "eval_loss": 4.360787391662598, "eval_runtime": 12.7009, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.079, "step": 60 }, { "epoch": 61.0, "eval_bleu": 10.4416, "eval_gen_len": 86.3333, "eval_loss": 4.36198091506958, "eval_runtime": 12.4279, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.08, "step": 61 }, { "epoch": 62.0, "eval_bleu": 10.678, "eval_gen_len": 71.3333, "eval_loss": 4.362994194030762, "eval_runtime": 12.6907, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.079, "step": 62 }, { "epoch": 63.0, "eval_bleu": 10.678, "eval_gen_len": 71.3333, "eval_loss": 4.366813659667969, "eval_runtime": 12.6482, "eval_samples_per_second": 0.237, "eval_steps_per_second": 0.079, "step": 63 }, { "epoch": 64.0, "eval_bleu": 11.1601, "eval_gen_len": 74.6667, "eval_loss": 4.370214462280273, "eval_runtime": 11.7414, "eval_samples_per_second": 0.256, "eval_steps_per_second": 0.085, "step": 64 }, { "epoch": 65.0, "eval_bleu": 11.137, "eval_gen_len": 78.0, "eval_loss": 4.37328577041626, "eval_runtime": 13.9564, "eval_samples_per_second": 0.215, "eval_steps_per_second": 0.072, "step": 65 }, { "epoch": 66.0, "eval_bleu": 11.137, "eval_gen_len": 78.0, "eval_loss": 4.3749165534973145, "eval_runtime": 13.8294, "eval_samples_per_second": 0.217, "eval_steps_per_second": 0.072, "step": 66 }, { "epoch": 67.0, "eval_bleu": 11.137, "eval_gen_len": 78.0, "eval_loss": 4.3767194747924805, "eval_runtime": 13.7066, "eval_samples_per_second": 0.219, "eval_steps_per_second": 0.073, "step": 67 }, { "epoch": 68.0, "eval_bleu": 10.8916, "eval_gen_len": 88.0, "eval_loss": 4.3782854080200195, "eval_runtime": 16.3165, "eval_samples_per_second": 0.184, "eval_steps_per_second": 0.061, "step": 68 }, { "epoch": 69.0, "eval_bleu": 10.7733, "eval_gen_len": 67.3333, "eval_loss": 4.379751682281494, "eval_runtime": 11.7272, "eval_samples_per_second": 0.256, "eval_steps_per_second": 0.085, "step": 69 }, { "epoch": 70.0, "eval_bleu": 11.5509, "eval_gen_len": 66.3333, "eval_loss": 4.381042957305908, "eval_runtime": 11.8594, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.084, "step": 70 }, { "epoch": 71.0, "eval_bleu": 11.4262, "eval_gen_len": 70.0, "eval_loss": 4.382328510284424, "eval_runtime": 13.0297, "eval_samples_per_second": 0.23, "eval_steps_per_second": 0.077, "step": 71 }, { "epoch": 72.0, "eval_bleu": 11.4262, "eval_gen_len": 70.0, "eval_loss": 4.383648872375488, "eval_runtime": 14.0598, "eval_samples_per_second": 0.213, "eval_steps_per_second": 0.071, "step": 72 }, { "epoch": 73.0, "eval_bleu": 11.195, "eval_gen_len": 66.3333, "eval_loss": 4.384763717651367, "eval_runtime": 11.4527, "eval_samples_per_second": 0.262, "eval_steps_per_second": 0.087, "step": 73 }, { "epoch": 74.0, "eval_bleu": 11.1115, "eval_gen_len": 67.0, "eval_loss": 4.384941577911377, "eval_runtime": 12.3777, "eval_samples_per_second": 0.242, "eval_steps_per_second": 0.081, "step": 74 }, { "epoch": 75.0, "eval_bleu": 11.2741, "eval_gen_len": 67.6667, "eval_loss": 4.385134696960449, "eval_runtime": 12.3325, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.081, "step": 75 }, { "epoch": 76.0, "eval_bleu": 11.6137, "eval_gen_len": 71.0, "eval_loss": 4.38496208190918, "eval_runtime": 12.6988, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.079, "step": 76 }, { "epoch": 77.0, "eval_bleu": 11.2741, "eval_gen_len": 67.6667, "eval_loss": 4.3848419189453125, "eval_runtime": 11.4524, "eval_samples_per_second": 0.262, "eval_steps_per_second": 0.087, "step": 77 }, { "epoch": 78.0, "eval_bleu": 11.1454, "eval_gen_len": 62.0, "eval_loss": 4.38457727432251, "eval_runtime": 10.4375, "eval_samples_per_second": 0.287, "eval_steps_per_second": 0.096, "step": 78 }, { "epoch": 79.0, "eval_bleu": 11.1454, "eval_gen_len": 62.0, "eval_loss": 4.3842973709106445, "eval_runtime": 10.4146, "eval_samples_per_second": 0.288, "eval_steps_per_second": 0.096, "step": 79 }, { "epoch": 80.0, "eval_bleu": 11.267, "eval_gen_len": 68.6667, "eval_loss": 4.383995532989502, "eval_runtime": 12.6896, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.079, "step": 80 }, { "epoch": 81.0, "eval_bleu": 11.267, "eval_gen_len": 68.6667, "eval_loss": 4.383782386779785, "eval_runtime": 12.4982, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.08, "step": 81 }, { "epoch": 82.0, "eval_bleu": 11.267, "eval_gen_len": 68.6667, "eval_loss": 4.383605480194092, "eval_runtime": 12.2559, "eval_samples_per_second": 0.245, "eval_steps_per_second": 0.082, "step": 82 }, { "epoch": 83.0, "eval_bleu": 11.267, "eval_gen_len": 68.6667, "eval_loss": 4.383455753326416, "eval_runtime": 12.7951, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 83 }, { "epoch": 84.0, "eval_bleu": 11.267, "eval_gen_len": 68.6667, "eval_loss": 4.38338565826416, "eval_runtime": 12.5193, "eval_samples_per_second": 0.24, "eval_steps_per_second": 0.08, "step": 84 }, { "epoch": 85.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.383331298828125, "eval_runtime": 12.8027, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 85 }, { "epoch": 86.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.38341760635376, "eval_runtime": 12.849, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.078, "step": 86 }, { "epoch": 87.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.383518695831299, "eval_runtime": 12.3575, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.081, "step": 87 }, { "epoch": 88.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.3836445808410645, "eval_runtime": 12.8174, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 88 }, { "epoch": 89.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.383749961853027, "eval_runtime": 12.8695, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.078, "step": 89 }, { "epoch": 90.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.383831977844238, "eval_runtime": 12.8527, "eval_samples_per_second": 0.233, "eval_steps_per_second": 0.078, "step": 90 }, { "epoch": 91.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.383894920349121, "eval_runtime": 12.7272, "eval_samples_per_second": 0.236, "eval_steps_per_second": 0.079, "step": 91 }, { "epoch": 92.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.383920669555664, "eval_runtime": 14.3034, "eval_samples_per_second": 0.21, "eval_steps_per_second": 0.07, "step": 92 }, { "epoch": 93.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.38393497467041, "eval_runtime": 12.4454, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.08, "step": 93 }, { "epoch": 94.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.384005546569824, "eval_runtime": 12.8192, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 94 }, { "epoch": 95.0, "eval_bleu": 11.4092, "eval_gen_len": 69.0, "eval_loss": 4.3840460777282715, "eval_runtime": 12.4402, "eval_samples_per_second": 0.241, "eval_steps_per_second": 0.08, "step": 95 }, { "epoch": 96.0, "eval_bleu": 10.7891, "eval_gen_len": 70.6667, "eval_loss": 4.384109020233154, "eval_runtime": 12.8195, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 96 }, { "epoch": 97.0, "eval_bleu": 10.7891, "eval_gen_len": 70.6667, "eval_loss": 4.384162902832031, "eval_runtime": 12.3694, "eval_samples_per_second": 0.243, "eval_steps_per_second": 0.081, "step": 97 }, { "epoch": 98.0, "eval_bleu": 10.7891, "eval_gen_len": 70.6667, "eval_loss": 4.384200096130371, "eval_runtime": 12.8132, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 98 }, { "epoch": 99.0, "eval_bleu": 10.7891, "eval_gen_len": 70.6667, "eval_loss": 4.384230613708496, "eval_runtime": 12.6103, "eval_samples_per_second": 0.238, "eval_steps_per_second": 0.079, "step": 99 }, { "epoch": 100.0, "eval_bleu": 10.7891, "eval_gen_len": 70.6667, "eval_loss": 4.384246349334717, "eval_runtime": 12.8394, "eval_samples_per_second": 0.234, "eval_steps_per_second": 0.078, "step": 100 } ], "logging_steps": 500, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7786030694400.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }