{ "best_metric": 28.9507, "best_model_checkpoint": "./ko-en_mbartLarge_exp20p/checkpoint-48000", "epoch": 7.425455389256294, "eval_steps": 4000, "global_step": 64000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 1.25e-05, "loss": 1.9162, "step": 500 }, { "epoch": 0.12, "learning_rate": 2.5e-05, "loss": 1.8085, "step": 1000 }, { "epoch": 0.17, "learning_rate": 3.7500000000000003e-05, "loss": 1.6542, "step": 1500 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 1.543, "step": 2000 }, { "epoch": 0.29, "learning_rate": 4.999973747616574e-05, "loss": 1.4815, "step": 2500 }, { "epoch": 0.35, "learning_rate": 4.999894991017647e-05, "loss": 1.4549, "step": 3000 }, { "epoch": 0.41, "learning_rate": 4.999763731857256e-05, "loss": 1.4233, "step": 3500 }, { "epoch": 0.46, "learning_rate": 4.9995799728920956e-05, "loss": 1.4008, "step": 4000 }, { "epoch": 0.46, "eval_bleu": 22.7174, "eval_gen_len": 18.7094, "eval_loss": 1.3739314079284668, "eval_runtime": 1153.4033, "eval_samples_per_second": 14.944, "eval_steps_per_second": 0.935, "step": 4000 }, { "epoch": 0.52, "learning_rate": 4.9993437179814526e-05, "loss": 1.3868, "step": 4500 }, { "epoch": 0.58, "learning_rate": 4.999054972087132e-05, "loss": 1.3612, "step": 5000 }, { "epoch": 0.64, "learning_rate": 4.9987137412733476e-05, "loss": 1.3519, "step": 5500 }, { "epoch": 0.7, "learning_rate": 4.998320032706597e-05, "loss": 1.351, "step": 6000 }, { "epoch": 0.75, "learning_rate": 4.997873854655512e-05, "loss": 1.3285, "step": 6500 }, { "epoch": 0.81, "learning_rate": 4.9973752164906806e-05, "loss": 1.2959, "step": 7000 }, { "epoch": 0.87, "learning_rate": 4.9968241286844556e-05, "loss": 1.3041, "step": 7500 }, { "epoch": 0.93, "learning_rate": 4.996220602810732e-05, "loss": 1.2847, "step": 8000 }, { "epoch": 0.93, "eval_bleu": 24.8557, "eval_gen_len": 18.7254, "eval_loss": 1.2651795148849487, "eval_runtime": 1152.6426, "eval_samples_per_second": 14.954, "eval_steps_per_second": 0.935, "step": 8000 }, { "epoch": 0.99, "learning_rate": 4.995564651544704e-05, "loss": 1.2796, "step": 8500 }, { "epoch": 1.04, "learning_rate": 4.994856288662599e-05, "loss": 1.2328, "step": 9000 }, { "epoch": 1.1, "learning_rate": 4.9940955290413884e-05, "loss": 1.2225, "step": 9500 }, { "epoch": 1.16, "learning_rate": 4.993282388658474e-05, "loss": 1.2061, "step": 10000 }, { "epoch": 1.22, "learning_rate": 4.992416884591354e-05, "loss": 1.2152, "step": 10500 }, { "epoch": 1.28, "learning_rate": 4.991499035017266e-05, "loss": 1.1955, "step": 11000 }, { "epoch": 1.33, "learning_rate": 4.9905288592127996e-05, "loss": 1.1855, "step": 11500 }, { "epoch": 1.39, "learning_rate": 4.9895063775534965e-05, "loss": 1.2009, "step": 12000 }, { "epoch": 1.39, "eval_bleu": 26.2074, "eval_gen_len": 18.7513, "eval_loss": 1.20820152759552, "eval_runtime": 1153.2349, "eval_samples_per_second": 14.947, "eval_steps_per_second": 0.935, "step": 12000 }, { "epoch": 1.45, "learning_rate": 4.9884316115134224e-05, "loss": 1.1941, "step": 12500 }, { "epoch": 1.51, "learning_rate": 4.987304583664712e-05, "loss": 1.1982, "step": 13000 }, { "epoch": 1.57, "learning_rate": 4.986125317677099e-05, "loss": 1.188, "step": 13500 }, { "epoch": 1.62, "learning_rate": 4.98489383831742e-05, "loss": 1.1825, "step": 14000 }, { "epoch": 1.68, "learning_rate": 4.983610171449087e-05, "loss": 1.1948, "step": 14500 }, { "epoch": 1.74, "learning_rate": 4.982274344031553e-05, "loss": 1.1476, "step": 15000 }, { "epoch": 1.8, "learning_rate": 4.980886384119741e-05, "loss": 1.1627, "step": 15500 }, { "epoch": 1.86, "learning_rate": 4.979446320863455e-05, "loss": 1.1686, "step": 16000 }, { "epoch": 1.86, "eval_bleu": 26.304, "eval_gen_len": 19.161, "eval_loss": 1.1840966939926147, "eval_runtime": 1166.1525, "eval_samples_per_second": 14.781, "eval_steps_per_second": 0.924, "step": 16000 }, { "epoch": 1.91, "learning_rate": 4.97795418450677e-05, "loss": 1.1675, "step": 16500 }, { "epoch": 1.97, "learning_rate": 4.976410006387394e-05, "loss": 1.1585, "step": 17000 }, { "epoch": 2.03, "learning_rate": 4.974813818936012e-05, "loss": 1.0833, "step": 17500 }, { "epoch": 2.09, "learning_rate": 4.973165655675605e-05, "loss": 1.0134, "step": 18000 }, { "epoch": 2.15, "learning_rate": 4.9714655512207414e-05, "loss": 1.0257, "step": 18500 }, { "epoch": 2.2, "learning_rate": 4.969713541276859e-05, "loss": 1.0206, "step": 19000 }, { "epoch": 2.26, "learning_rate": 4.967909662639506e-05, "loss": 1.0503, "step": 19500 }, { "epoch": 2.32, "learning_rate": 4.9660539531935746e-05, "loss": 1.0205, "step": 20000 }, { "epoch": 2.32, "eval_bleu": 27.8937, "eval_gen_len": 18.6638, "eval_loss": 1.144053339958191, "eval_runtime": 1147.812, "eval_samples_per_second": 15.017, "eval_steps_per_second": 0.939, "step": 20000 }, { "epoch": 2.38, "learning_rate": 4.9641464519125e-05, "loss": 1.032, "step": 20500 }, { "epoch": 2.44, "learning_rate": 4.962187198857447e-05, "loss": 1.0121, "step": 21000 }, { "epoch": 2.49, "learning_rate": 4.960176235176465e-05, "loss": 1.013, "step": 21500 }, { "epoch": 2.55, "learning_rate": 4.958113603103627e-05, "loss": 1.0169, "step": 22000 }, { "epoch": 2.61, "learning_rate": 4.9559993459581375e-05, "loss": 1.0315, "step": 22500 }, { "epoch": 2.67, "learning_rate": 4.953833508143429e-05, "loss": 1.0431, "step": 23000 }, { "epoch": 2.73, "learning_rate": 4.951616135146226e-05, "loss": 1.0398, "step": 23500 }, { "epoch": 2.78, "learning_rate": 4.949347273535588e-05, "loss": 1.0217, "step": 24000 }, { "epoch": 2.78, "eval_bleu": 28.4149, "eval_gen_len": 18.6666, "eval_loss": 1.1301122903823853, "eval_runtime": 1149.3346, "eval_samples_per_second": 14.997, "eval_steps_per_second": 0.938, "step": 24000 }, { "epoch": 2.84, "learning_rate": 4.9470269709619356e-05, "loss": 1.0313, "step": 24500 }, { "epoch": 2.9, "learning_rate": 4.944655276156047e-05, "loss": 1.0127, "step": 25000 }, { "epoch": 2.96, "learning_rate": 4.9422322389280354e-05, "loss": 1.0183, "step": 25500 }, { "epoch": 3.02, "learning_rate": 4.939757910166303e-05, "loss": 0.9723, "step": 26000 }, { "epoch": 3.07, "learning_rate": 4.937232341836471e-05, "loss": 0.8652, "step": 26500 }, { "epoch": 3.13, "learning_rate": 4.9346555869802904e-05, "loss": 0.8811, "step": 27000 }, { "epoch": 3.19, "learning_rate": 4.9320276997145255e-05, "loss": 0.8824, "step": 27500 }, { "epoch": 3.25, "learning_rate": 4.929348735229821e-05, "loss": 0.8876, "step": 28000 }, { "epoch": 3.25, "eval_bleu": 28.5803, "eval_gen_len": 18.6229, "eval_loss": 1.1270363330841064, "eval_runtime": 1153.2981, "eval_samples_per_second": 14.946, "eval_steps_per_second": 0.935, "step": 28000 }, { "epoch": 3.31, "learning_rate": 4.926618749789539e-05, "loss": 0.8734, "step": 28500 }, { "epoch": 3.36, "learning_rate": 4.923837800728578e-05, "loss": 0.8772, "step": 29000 }, { "epoch": 3.42, "learning_rate": 4.921005946452171e-05, "loss": 0.8821, "step": 29500 }, { "epoch": 3.48, "learning_rate": 4.918123246434658e-05, "loss": 0.8774, "step": 30000 }, { "epoch": 3.54, "learning_rate": 4.915189761218237e-05, "loss": 0.8932, "step": 30500 }, { "epoch": 3.6, "learning_rate": 4.912205552411688e-05, "loss": 0.8745, "step": 31000 }, { "epoch": 3.65, "learning_rate": 4.9091706826890896e-05, "loss": 0.8965, "step": 31500 }, { "epoch": 3.71, "learning_rate": 4.9060852157884893e-05, "loss": 0.9024, "step": 32000 }, { "epoch": 3.71, "eval_bleu": 28.852, "eval_gen_len": 18.7813, "eval_loss": 1.118120551109314, "eval_runtime": 1155.1982, "eval_samples_per_second": 14.921, "eval_steps_per_second": 0.933, "step": 32000 }, { "epoch": 3.77, "learning_rate": 4.9029492165105776e-05, "loss": 0.8909, "step": 32500 }, { "epoch": 3.83, "learning_rate": 4.899762750717318e-05, "loss": 0.8989, "step": 33000 }, { "epoch": 3.89, "learning_rate": 4.8965258853305676e-05, "loss": 0.9054, "step": 33500 }, { "epoch": 3.94, "learning_rate": 4.893238688330673e-05, "loss": 0.8852, "step": 34000 }, { "epoch": 4.0, "learning_rate": 4.889901228755036e-05, "loss": 0.8879, "step": 34500 }, { "epoch": 4.06, "learning_rate": 4.886513576696674e-05, "loss": 0.7955, "step": 35000 }, { "epoch": 4.12, "learning_rate": 4.8830758033027385e-05, "loss": 0.795, "step": 35500 }, { "epoch": 4.18, "learning_rate": 4.879587980773026e-05, "loss": 0.7927, "step": 36000 }, { "epoch": 4.18, "eval_bleu": 28.3975, "eval_gen_len": 18.4863, "eval_loss": 1.1393358707427979, "eval_runtime": 1137.7417, "eval_samples_per_second": 15.15, "eval_steps_per_second": 0.947, "step": 36000 }, { "epoch": 4.23, "learning_rate": 4.8760501823584607e-05, "loss": 0.8031, "step": 36500 }, { "epoch": 4.29, "learning_rate": 4.8724624823595535e-05, "loss": 0.7891, "step": 37000 }, { "epoch": 4.35, "learning_rate": 4.8688249561248456e-05, "loss": 0.8028, "step": 37500 }, { "epoch": 4.41, "learning_rate": 4.8651376800493245e-05, "loss": 0.8059, "step": 38000 }, { "epoch": 4.47, "learning_rate": 4.861400731572818e-05, "loss": 0.8093, "step": 38500 }, { "epoch": 4.52, "learning_rate": 4.857614189178369e-05, "loss": 0.8044, "step": 39000 }, { "epoch": 4.58, "learning_rate": 4.8537781323905896e-05, "loss": 0.8098, "step": 39500 }, { "epoch": 4.64, "learning_rate": 4.849892641773984e-05, "loss": 0.8174, "step": 40000 }, { "epoch": 4.64, "eval_bleu": 28.6313, "eval_gen_len": 18.3916, "eval_loss": 1.124934434890747, "eval_runtime": 1132.4078, "eval_samples_per_second": 15.222, "eval_steps_per_second": 0.952, "step": 40000 }, { "epoch": 4.7, "learning_rate": 4.845957798931265e-05, "loss": 0.821, "step": 40500 }, { "epoch": 4.76, "learning_rate": 4.841973686501636e-05, "loss": 0.8104, "step": 41000 }, { "epoch": 4.81, "learning_rate": 4.8379403881590544e-05, "loss": 0.8155, "step": 41500 }, { "epoch": 4.87, "learning_rate": 4.833857988610474e-05, "loss": 0.8213, "step": 42000 }, { "epoch": 4.93, "learning_rate": 4.8297265735940714e-05, "loss": 0.819, "step": 42500 }, { "epoch": 4.99, "learning_rate": 4.825546229877439e-05, "loss": 0.8255, "step": 43000 }, { "epoch": 5.05, "learning_rate": 4.8213170452557655e-05, "loss": 0.7598, "step": 43500 }, { "epoch": 5.11, "learning_rate": 4.817039108549991e-05, "loss": 0.7434, "step": 44000 }, { "epoch": 5.11, "eval_bleu": 28.2898, "eval_gen_len": 18.7739, "eval_loss": 1.1695784330368042, "eval_runtime": 1160.4522, "eval_samples_per_second": 14.854, "eval_steps_per_second": 0.929, "step": 44000 }, { "epoch": 5.16, "learning_rate": 4.812712509604945e-05, "loss": 0.7564, "step": 44500 }, { "epoch": 5.22, "learning_rate": 4.808337339287453e-05, "loss": 0.7496, "step": 45000 }, { "epoch": 5.28, "learning_rate": 4.803913689484436e-05, "loss": 0.7381, "step": 45500 }, { "epoch": 5.34, "learning_rate": 4.7994416531009745e-05, "loss": 0.7394, "step": 46000 }, { "epoch": 5.4, "learning_rate": 4.794921324058358e-05, "loss": 0.7468, "step": 46500 }, { "epoch": 5.45, "learning_rate": 4.790352797292116e-05, "loss": 0.7448, "step": 47000 }, { "epoch": 5.51, "learning_rate": 4.785736168750022e-05, "loss": 0.7339, "step": 47500 }, { "epoch": 5.57, "learning_rate": 4.781071535390078e-05, "loss": 0.7416, "step": 48000 }, { "epoch": 5.57, "eval_bleu": 28.9507, "eval_gen_len": 18.6744, "eval_loss": 1.1450848579406738, "eval_runtime": 1148.6011, "eval_samples_per_second": 15.007, "eval_steps_per_second": 0.939, "step": 48000 }, { "epoch": 5.63, "learning_rate": 4.7763589951784795e-05, "loss": 0.7509, "step": 48500 }, { "epoch": 5.69, "learning_rate": 4.771598647087556e-05, "loss": 0.7383, "step": 49000 }, { "epoch": 5.74, "learning_rate": 4.7667905910936926e-05, "loss": 0.735, "step": 49500 }, { "epoch": 5.8, "learning_rate": 4.761934928175236e-05, "loss": 0.7541, "step": 50000 }, { "epoch": 5.86, "learning_rate": 4.7570317603103634e-05, "loss": 0.7554, "step": 50500 }, { "epoch": 5.92, "learning_rate": 4.75208119047495e-05, "loss": 0.7511, "step": 51000 }, { "epoch": 5.98, "learning_rate": 4.747083322640401e-05, "loss": 0.7599, "step": 51500 }, { "epoch": 6.03, "learning_rate": 4.742038261771472e-05, "loss": 0.689, "step": 52000 }, { "epoch": 6.03, "eval_bleu": 28.3532, "eval_gen_len": 18.4481, "eval_loss": 1.1758701801300049, "eval_runtime": 1136.8302, "eval_samples_per_second": 15.162, "eval_steps_per_second": 0.948, "step": 52000 }, { "epoch": 6.09, "learning_rate": 4.73694611382406e-05, "loss": 0.6386, "step": 52500 }, { "epoch": 6.15, "learning_rate": 4.7318069857429815e-05, "loss": 0.6589, "step": 53000 }, { "epoch": 6.21, "learning_rate": 4.7266209854597246e-05, "loss": 0.7347, "step": 53500 }, { "epoch": 6.27, "learning_rate": 4.721388221890185e-05, "loss": 0.7288, "step": 54000 }, { "epoch": 6.32, "learning_rate": 4.716108804932374e-05, "loss": 0.7354, "step": 54500 }, { "epoch": 6.38, "learning_rate": 4.7107828454641146e-05, "loss": 0.7334, "step": 55000 }, { "epoch": 6.44, "learning_rate": 4.7054104553407104e-05, "loss": 0.7244, "step": 55500 }, { "epoch": 6.5, "learning_rate": 4.699991747392598e-05, "loss": 0.7238, "step": 56000 }, { "epoch": 6.5, "eval_bleu": 28.3827, "eval_gen_len": 18.7038, "eval_loss": 1.1825261116027832, "eval_runtime": 1155.4906, "eval_samples_per_second": 14.917, "eval_steps_per_second": 0.933, "step": 56000 }, { "epoch": 6.56, "learning_rate": 4.6945268354229766e-05, "loss": 0.7205, "step": 56500 }, { "epoch": 6.61, "learning_rate": 4.689015834205418e-05, "loss": 0.7157, "step": 57000 }, { "epoch": 6.67, "learning_rate": 4.6834588594814556e-05, "loss": 0.7078, "step": 57500 }, { "epoch": 6.73, "learning_rate": 4.677856027958154e-05, "loss": 0.71, "step": 58000 }, { "epoch": 6.79, "learning_rate": 4.6722074573056576e-05, "loss": 0.7082, "step": 58500 }, { "epoch": 6.85, "learning_rate": 4.6665132661547215e-05, "loss": 0.7258, "step": 59000 }, { "epoch": 6.9, "learning_rate": 4.660773574094218e-05, "loss": 0.7306, "step": 59500 }, { "epoch": 6.96, "learning_rate": 4.6549885016686236e-05, "loss": 0.7238, "step": 60000 }, { "epoch": 6.96, "eval_bleu": 28.8248, "eval_gen_len": 18.5073, "eval_loss": 1.1675989627838135, "eval_runtime": 1137.8605, "eval_samples_per_second": 15.149, "eval_steps_per_second": 0.947, "step": 60000 }, { "epoch": 7.02, "learning_rate": 4.649158170375489e-05, "loss": 0.681, "step": 60500 }, { "epoch": 7.08, "learning_rate": 4.6432827026628894e-05, "loss": 0.571, "step": 61000 }, { "epoch": 7.14, "learning_rate": 4.63736222192685e-05, "loss": 0.5642, "step": 61500 }, { "epoch": 7.19, "learning_rate": 4.631396852508754e-05, "loss": 0.5604, "step": 62000 }, { "epoch": 7.25, "learning_rate": 4.625386719692734e-05, "loss": 0.5799, "step": 62500 }, { "epoch": 7.31, "learning_rate": 4.61933194970304e-05, "loss": 0.5989, "step": 63000 }, { "epoch": 7.37, "learning_rate": 4.613232669701384e-05, "loss": 0.6339, "step": 63500 }, { "epoch": 7.43, "learning_rate": 4.607089007784279e-05, "loss": 0.657, "step": 64000 }, { "epoch": 7.43, "eval_bleu": 27.4378, "eval_gen_len": 18.4196, "eval_loss": 1.2514448165893555, "eval_runtime": 1132.4116, "eval_samples_per_second": 15.221, "eval_steps_per_second": 0.952, "step": 64000 }, { "epoch": 7.43, "step": 64000, "total_flos": 2.2192449043268895e+18, "train_loss": 0.9512434206008911, "train_runtime": 65044.6534, "train_samples_per_second": 84.803, "train_steps_per_second": 5.3 } ], "logging_steps": 500, "max_steps": 344760, "num_train_epochs": 40, "save_steps": 4000, "total_flos": 2.2192449043268895e+18, "trial_name": null, "trial_params": null }