|
{ |
|
"best_metric": 28.9507, |
|
"best_model_checkpoint": "./ko-en_mbartLarge_exp20p/checkpoint-48000", |
|
"epoch": 5.569091541942221, |
|
"eval_steps": 4000, |
|
"global_step": 48000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.9162, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.8085, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.6542, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5e-05, |
|
"loss": 1.543, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.999973747616574e-05, |
|
"loss": 1.4815, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.999894991017647e-05, |
|
"loss": 1.4549, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.999763731857256e-05, |
|
"loss": 1.4233, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.9995799728920956e-05, |
|
"loss": 1.4008, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 22.7174, |
|
"eval_gen_len": 18.7094, |
|
"eval_loss": 1.3739314079284668, |
|
"eval_runtime": 1153.4033, |
|
"eval_samples_per_second": 14.944, |
|
"eval_steps_per_second": 0.935, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9993437179814526e-05, |
|
"loss": 1.3868, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.999054972087132e-05, |
|
"loss": 1.3612, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.9987137412733476e-05, |
|
"loss": 1.3519, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.998320032706597e-05, |
|
"loss": 1.351, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.997873854655512e-05, |
|
"loss": 1.3285, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.9973752164906806e-05, |
|
"loss": 1.2959, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.9968241286844556e-05, |
|
"loss": 1.3041, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.996220602810732e-05, |
|
"loss": 1.2847, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 24.8557, |
|
"eval_gen_len": 18.7254, |
|
"eval_loss": 1.2651795148849487, |
|
"eval_runtime": 1152.6426, |
|
"eval_samples_per_second": 14.954, |
|
"eval_steps_per_second": 0.935, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.995564651544704e-05, |
|
"loss": 1.2796, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.994856288662599e-05, |
|
"loss": 1.2328, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.9940955290413884e-05, |
|
"loss": 1.2225, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.993282388658474e-05, |
|
"loss": 1.2061, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.992416884591354e-05, |
|
"loss": 1.2152, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.991499035017266e-05, |
|
"loss": 1.1955, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.9905288592127996e-05, |
|
"loss": 1.1855, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.9895063775534965e-05, |
|
"loss": 1.2009, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_bleu": 26.2074, |
|
"eval_gen_len": 18.7513, |
|
"eval_loss": 1.20820152759552, |
|
"eval_runtime": 1153.2349, |
|
"eval_samples_per_second": 14.947, |
|
"eval_steps_per_second": 0.935, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.9884316115134224e-05, |
|
"loss": 1.1941, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.987304583664712e-05, |
|
"loss": 1.1982, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.986125317677099e-05, |
|
"loss": 1.188, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.98489383831742e-05, |
|
"loss": 1.1825, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.983610171449087e-05, |
|
"loss": 1.1948, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.982274344031553e-05, |
|
"loss": 1.1476, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.980886384119741e-05, |
|
"loss": 1.1627, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.979446320863455e-05, |
|
"loss": 1.1686, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_bleu": 26.304, |
|
"eval_gen_len": 19.161, |
|
"eval_loss": 1.1840966939926147, |
|
"eval_runtime": 1166.1525, |
|
"eval_samples_per_second": 14.781, |
|
"eval_steps_per_second": 0.924, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.97795418450677e-05, |
|
"loss": 1.1675, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.976410006387394e-05, |
|
"loss": 1.1585, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.974813818936012e-05, |
|
"loss": 1.0833, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.973165655675605e-05, |
|
"loss": 1.0134, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.9714655512207414e-05, |
|
"loss": 1.0257, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.969713541276859e-05, |
|
"loss": 1.0206, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.967909662639506e-05, |
|
"loss": 1.0503, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.9660539531935746e-05, |
|
"loss": 1.0205, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_bleu": 27.8937, |
|
"eval_gen_len": 18.6638, |
|
"eval_loss": 1.144053339958191, |
|
"eval_runtime": 1147.812, |
|
"eval_samples_per_second": 15.017, |
|
"eval_steps_per_second": 0.939, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.9641464519125e-05, |
|
"loss": 1.032, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.962187198857447e-05, |
|
"loss": 1.0121, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.960176235176465e-05, |
|
"loss": 1.013, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.958113603103627e-05, |
|
"loss": 1.0169, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.9559993459581375e-05, |
|
"loss": 1.0315, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.953833508143429e-05, |
|
"loss": 1.0431, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.951616135146226e-05, |
|
"loss": 1.0398, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.949347273535588e-05, |
|
"loss": 1.0217, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_bleu": 28.4149, |
|
"eval_gen_len": 18.6666, |
|
"eval_loss": 1.1301122903823853, |
|
"eval_runtime": 1149.3346, |
|
"eval_samples_per_second": 14.997, |
|
"eval_steps_per_second": 0.938, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.9470269709619356e-05, |
|
"loss": 1.0313, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 4.944655276156047e-05, |
|
"loss": 1.0127, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.9422322389280354e-05, |
|
"loss": 1.0183, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.939757910166303e-05, |
|
"loss": 0.9723, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.937232341836471e-05, |
|
"loss": 0.8652, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.9346555869802904e-05, |
|
"loss": 0.8811, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.9320276997145255e-05, |
|
"loss": 0.8824, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.929348735229821e-05, |
|
"loss": 0.8876, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_bleu": 28.5803, |
|
"eval_gen_len": 18.6229, |
|
"eval_loss": 1.1270363330841064, |
|
"eval_runtime": 1153.2981, |
|
"eval_samples_per_second": 14.946, |
|
"eval_steps_per_second": 0.935, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 4.926618749789539e-05, |
|
"loss": 0.8734, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 4.923837800728578e-05, |
|
"loss": 0.8772, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 4.921005946452171e-05, |
|
"loss": 0.8821, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.918123246434658e-05, |
|
"loss": 0.8774, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.915189761218237e-05, |
|
"loss": 0.8932, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.912205552411688e-05, |
|
"loss": 0.8745, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.9091706826890896e-05, |
|
"loss": 0.8965, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 4.9060852157884893e-05, |
|
"loss": 0.9024, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_bleu": 28.852, |
|
"eval_gen_len": 18.7813, |
|
"eval_loss": 1.118120551109314, |
|
"eval_runtime": 1155.1982, |
|
"eval_samples_per_second": 14.921, |
|
"eval_steps_per_second": 0.933, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.9029492165105776e-05, |
|
"loss": 0.8909, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.899762750717318e-05, |
|
"loss": 0.8989, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.8965258853305676e-05, |
|
"loss": 0.9054, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.893238688330673e-05, |
|
"loss": 0.8852, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.889901228755036e-05, |
|
"loss": 0.8879, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.886513576696674e-05, |
|
"loss": 0.7955, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.8830758033027385e-05, |
|
"loss": 0.795, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.879587980773026e-05, |
|
"loss": 0.7927, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_bleu": 28.3975, |
|
"eval_gen_len": 18.4863, |
|
"eval_loss": 1.1393358707427979, |
|
"eval_runtime": 1137.7417, |
|
"eval_samples_per_second": 15.15, |
|
"eval_steps_per_second": 0.947, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.8760501823584607e-05, |
|
"loss": 0.8031, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.8724624823595535e-05, |
|
"loss": 0.7891, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.8688249561248456e-05, |
|
"loss": 0.8028, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 4.8651376800493245e-05, |
|
"loss": 0.8059, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.861400731572818e-05, |
|
"loss": 0.8093, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.857614189178369e-05, |
|
"loss": 0.8044, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.8537781323905896e-05, |
|
"loss": 0.8098, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 4.849892641773984e-05, |
|
"loss": 0.8174, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_bleu": 28.6313, |
|
"eval_gen_len": 18.3916, |
|
"eval_loss": 1.124934434890747, |
|
"eval_runtime": 1132.4078, |
|
"eval_samples_per_second": 15.222, |
|
"eval_steps_per_second": 0.952, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 4.845957798931265e-05, |
|
"loss": 0.821, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.841973686501636e-05, |
|
"loss": 0.8104, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 4.8379403881590544e-05, |
|
"loss": 0.8155, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.833857988610474e-05, |
|
"loss": 0.8213, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 4.8297265735940714e-05, |
|
"loss": 0.819, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 4.825546229877439e-05, |
|
"loss": 0.8255, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.8213170452557655e-05, |
|
"loss": 0.7598, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.817039108549991e-05, |
|
"loss": 0.7434, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_bleu": 28.2898, |
|
"eval_gen_len": 18.7739, |
|
"eval_loss": 1.1695784330368042, |
|
"eval_runtime": 1160.4522, |
|
"eval_samples_per_second": 14.854, |
|
"eval_steps_per_second": 0.929, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.812712509604945e-05, |
|
"loss": 0.7564, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.808337339287453e-05, |
|
"loss": 0.7496, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 4.803913689484436e-05, |
|
"loss": 0.7381, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 4.7994416531009745e-05, |
|
"loss": 0.7394, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.794921324058358e-05, |
|
"loss": 0.7468, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.790352797292116e-05, |
|
"loss": 0.7448, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.785736168750022e-05, |
|
"loss": 0.7339, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.781071535390078e-05, |
|
"loss": 0.7416, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_bleu": 28.9507, |
|
"eval_gen_len": 18.6744, |
|
"eval_loss": 1.1450848579406738, |
|
"eval_runtime": 1148.6011, |
|
"eval_samples_per_second": 15.007, |
|
"eval_steps_per_second": 0.939, |
|
"step": 48000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 344760, |
|
"num_train_epochs": 40, |
|
"save_steps": 4000, |
|
"total_flos": 1.6644596837352735e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|