{ "best_metric": 28.8215, "best_model_checkpoint": "./ko-en_mbartLarge_exp10p/checkpoint-32000", "epoch": 6.188118811881188, "eval_steps": 2000, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.5e-05, "loss": 1.9221, "step": 500 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 1.7751, "step": 1000 }, { "epoch": 0.23, "learning_rate": 4.9999535065698766e-05, "loss": 1.5628, "step": 1500 }, { "epoch": 0.31, "learning_rate": 4.9998140280088176e-05, "loss": 1.4782, "step": 2000 }, { "epoch": 0.31, "eval_bleu": 21.538, "eval_gen_len": 18.6032, "eval_loss": 1.435985803604126, "eval_runtime": 861.7869, "eval_samples_per_second": 15.0, "eval_steps_per_second": 0.938, "step": 2000 }, { "epoch": 0.39, "learning_rate": 4.999581569504692e-05, "loss": 1.4515, "step": 2500 }, { "epoch": 0.46, "learning_rate": 4.999256139703734e-05, "loss": 1.4195, "step": 3000 }, { "epoch": 0.54, "learning_rate": 4.998837750710223e-05, "loss": 1.3946, "step": 3500 }, { "epoch": 0.62, "learning_rate": 4.99832641808603e-05, "loss": 1.3618, "step": 4000 }, { "epoch": 0.62, "eval_bleu": 23.8354, "eval_gen_len": 18.5594, "eval_loss": 1.3225533962249756, "eval_runtime": 848.5716, "eval_samples_per_second": 15.234, "eval_steps_per_second": 0.952, "step": 4000 }, { "epoch": 0.7, "learning_rate": 4.99772216085004e-05, "loss": 1.3398, "step": 4500 }, { "epoch": 0.77, "learning_rate": 4.9970250014774486e-05, "loss": 1.3278, "step": 5000 }, { "epoch": 0.85, "learning_rate": 4.996234965898918e-05, "loss": 1.3002, "step": 5500 }, { "epoch": 0.93, "learning_rate": 4.9953520834996206e-05, "loss": 1.2983, "step": 6000 }, { "epoch": 0.93, "eval_bleu": 25.0795, "eval_gen_len": 18.7894, "eval_loss": 1.2636672258377075, "eval_runtime": 866.6867, "eval_samples_per_second": 14.915, "eval_steps_per_second": 0.932, "step": 6000 }, { "epoch": 1.01, "learning_rate": 4.994376387118141e-05, "loss": 1.2851, "step": 6500 }, { "epoch": 1.08, "learning_rate": 4.993307913045257e-05, "loss": 1.2246, "step": 7000 }, { "epoch": 1.16, "learning_rate": 4.9921467010225866e-05, "loss": 1.2111, "step": 7500 }, { "epoch": 1.24, "learning_rate": 4.9908927942411154e-05, "loss": 1.2065, "step": 8000 }, { "epoch": 1.24, "eval_bleu": 25.7409, "eval_gen_len": 18.5615, "eval_loss": 1.237057089805603, "eval_runtime": 857.6049, "eval_samples_per_second": 15.073, "eval_steps_per_second": 0.942, "step": 8000 }, { "epoch": 1.31, "learning_rate": 4.989546239339585e-05, "loss": 1.201, "step": 8500 }, { "epoch": 1.39, "learning_rate": 4.98810708640276e-05, "loss": 1.2098, "step": 9000 }, { "epoch": 1.47, "learning_rate": 4.986575388959566e-05, "loss": 1.1832, "step": 9500 }, { "epoch": 1.55, "learning_rate": 4.984951203981097e-05, "loss": 1.1926, "step": 10000 }, { "epoch": 1.55, "eval_bleu": 26.0527, "eval_gen_len": 18.4019, "eval_loss": 1.2115644216537476, "eval_runtime": 851.6364, "eval_samples_per_second": 15.179, "eval_steps_per_second": 0.949, "step": 10000 }, { "epoch": 1.62, "learning_rate": 4.983234591878498e-05, "loss": 1.1858, "step": 10500 }, { "epoch": 1.7, "learning_rate": 4.981425616500717e-05, "loss": 1.1939, "step": 11000 }, { "epoch": 1.78, "learning_rate": 4.9795243451321304e-05, "loss": 1.1693, "step": 11500 }, { "epoch": 1.86, "learning_rate": 4.977530848490039e-05, "loss": 1.1734, "step": 12000 }, { "epoch": 1.86, "eval_bleu": 26.9802, "eval_gen_len": 18.6141, "eval_loss": 1.190738558769226, "eval_runtime": 858.138, "eval_samples_per_second": 15.064, "eval_steps_per_second": 0.942, "step": 12000 }, { "epoch": 1.93, "learning_rate": 4.975445200722042e-05, "loss": 1.173, "step": 12500 }, { "epoch": 2.01, "learning_rate": 4.973267479403273e-05, "loss": 1.1541, "step": 13000 }, { "epoch": 2.09, "learning_rate": 4.9709977655335196e-05, "loss": 1.0598, "step": 13500 }, { "epoch": 2.17, "learning_rate": 4.968636143534208e-05, "loss": 1.0677, "step": 14000 }, { "epoch": 2.17, "eval_bleu": 27.1925, "eval_gen_len": 18.4547, "eval_loss": 1.1801778078079224, "eval_runtime": 850.3421, "eval_samples_per_second": 15.202, "eval_steps_per_second": 0.95, "step": 14000 }, { "epoch": 2.24, "learning_rate": 4.966182701245266e-05, "loss": 1.0789, "step": 14500 }, { "epoch": 2.32, "learning_rate": 4.9636375299218484e-05, "loss": 1.0706, "step": 15000 }, { "epoch": 2.4, "learning_rate": 4.961000724230954e-05, "loss": 1.082, "step": 15500 }, { "epoch": 2.48, "learning_rate": 4.958272382247895e-05, "loss": 1.0773, "step": 16000 }, { "epoch": 2.48, "eval_bleu": 27.5641, "eval_gen_len": 18.6726, "eval_loss": 1.1654949188232422, "eval_runtime": 856.0869, "eval_samples_per_second": 15.1, "eval_steps_per_second": 0.944, "step": 16000 }, { "epoch": 2.55, "learning_rate": 4.955452605452653e-05, "loss": 1.0749, "step": 16500 }, { "epoch": 2.63, "learning_rate": 4.952541498726105e-05, "loss": 1.0845, "step": 17000 }, { "epoch": 2.71, "learning_rate": 4.949539170346119e-05, "loss": 1.0725, "step": 17500 }, { "epoch": 2.78, "learning_rate": 4.9464457319835334e-05, "loss": 1.0688, "step": 18000 }, { "epoch": 2.78, "eval_bleu": 27.6261, "eval_gen_len": 18.6127, "eval_loss": 1.1520819664001465, "eval_runtime": 854.4229, "eval_samples_per_second": 15.13, "eval_steps_per_second": 0.946, "step": 18000 }, { "epoch": 2.86, "learning_rate": 4.9432612986979945e-05, "loss": 1.0801, "step": 18500 }, { "epoch": 2.94, "learning_rate": 4.939985988933683e-05, "loss": 1.0993, "step": 19000 }, { "epoch": 3.02, "learning_rate": 4.9366199245149095e-05, "loss": 1.0534, "step": 19500 }, { "epoch": 3.09, "learning_rate": 4.9331632306415776e-05, "loss": 0.9542, "step": 20000 }, { "epoch": 3.09, "eval_bleu": 27.16, "eval_gen_len": 18.3782, "eval_loss": 1.1709098815917969, "eval_runtime": 856.056, "eval_samples_per_second": 15.101, "eval_steps_per_second": 0.944, "step": 20000 }, { "epoch": 3.17, "learning_rate": 4.929616035884531e-05, "loss": 0.9688, "step": 20500 }, { "epoch": 3.25, "learning_rate": 4.925978472180771e-05, "loss": 0.9453, "step": 21000 }, { "epoch": 3.33, "learning_rate": 4.9222506748285495e-05, "loss": 0.9471, "step": 21500 }, { "epoch": 3.4, "learning_rate": 4.918432782482335e-05, "loss": 0.9531, "step": 22000 }, { "epoch": 3.4, "eval_bleu": 28.0684, "eval_gen_len": 18.436, "eval_loss": 1.1434855461120605, "eval_runtime": 852.3756, "eval_samples_per_second": 15.166, "eval_steps_per_second": 0.948, "step": 22000 }, { "epoch": 3.48, "learning_rate": 4.914524937147655e-05, "loss": 0.9478, "step": 22500 }, { "epoch": 3.56, "learning_rate": 4.910527284175818e-05, "loss": 0.9594, "step": 23000 }, { "epoch": 3.64, "learning_rate": 4.906439972258503e-05, "loss": 0.9483, "step": 23500 }, { "epoch": 3.71, "learning_rate": 4.902263153422231e-05, "loss": 0.9756, "step": 24000 }, { "epoch": 3.71, "eval_bleu": 27.6025, "eval_gen_len": 18.7284, "eval_loss": 1.1564555168151855, "eval_runtime": 860.0664, "eval_samples_per_second": 15.03, "eval_steps_per_second": 0.939, "step": 24000 }, { "epoch": 3.79, "learning_rate": 4.8979969830227086e-05, "loss": 0.9912, "step": 24500 }, { "epoch": 3.87, "learning_rate": 4.893641619739053e-05, "loss": 0.9789, "step": 25000 }, { "epoch": 3.94, "learning_rate": 4.8891972255678876e-05, "loss": 1.0068, "step": 25500 }, { "epoch": 4.02, "learning_rate": 4.8846639658173156e-05, "loss": 0.9964, "step": 26000 }, { "epoch": 4.02, "eval_bleu": 25.6999, "eval_gen_len": 18.3255, "eval_loss": 1.2285293340682983, "eval_runtime": 852.9084, "eval_samples_per_second": 15.156, "eval_steps_per_second": 0.947, "step": 26000 }, { "epoch": 4.1, "learning_rate": 4.880042009100772e-05, "loss": 0.9576, "step": 26500 }, { "epoch": 4.18, "learning_rate": 4.8753315273307575e-05, "loss": 0.9583, "step": 27000 }, { "epoch": 4.25, "learning_rate": 4.8705326957124334e-05, "loss": 0.9696, "step": 27500 }, { "epoch": 4.33, "learning_rate": 4.865645692737114e-05, "loss": 0.9721, "step": 28000 }, { "epoch": 4.33, "eval_bleu": 27.3499, "eval_gen_len": 18.5409, "eval_loss": 1.1880507469177246, "eval_runtime": 849.7595, "eval_samples_per_second": 15.213, "eval_steps_per_second": 0.951, "step": 28000 }, { "epoch": 4.41, "learning_rate": 4.860670700175625e-05, "loss": 0.9355, "step": 28500 }, { "epoch": 4.49, "learning_rate": 4.855607903071542e-05, "loss": 0.9314, "step": 29000 }, { "epoch": 4.56, "learning_rate": 4.850457489734306e-05, "loss": 0.9142, "step": 29500 }, { "epoch": 4.64, "learning_rate": 4.845219651732225e-05, "loss": 0.9237, "step": 30000 }, { "epoch": 4.64, "eval_bleu": 28.2692, "eval_gen_len": 18.6614, "eval_loss": 1.1497083902359009, "eval_runtime": 859.3006, "eval_samples_per_second": 15.044, "eval_steps_per_second": 0.94, "step": 30000 }, { "epoch": 4.72, "learning_rate": 4.839894583885341e-05, "loss": 0.9102, "step": 30500 }, { "epoch": 4.8, "learning_rate": 4.834482484258193e-05, "loss": 0.8956, "step": 31000 }, { "epoch": 4.87, "learning_rate": 4.8289835541524394e-05, "loss": 0.8902, "step": 31500 }, { "epoch": 4.95, "learning_rate": 4.8233979980993785e-05, "loss": 0.9041, "step": 32000 }, { "epoch": 4.95, "eval_bleu": 28.8215, "eval_gen_len": 18.5493, "eval_loss": 1.1282682418823242, "eval_runtime": 861.0284, "eval_samples_per_second": 15.013, "eval_steps_per_second": 0.938, "step": 32000 }, { "epoch": 5.03, "learning_rate": 4.817726023852338e-05, "loss": 0.8124, "step": 32500 }, { "epoch": 5.11, "learning_rate": 4.81196784237895e-05, "loss": 0.6719, "step": 33000 }, { "epoch": 5.18, "learning_rate": 4.8061236678533e-05, "loss": 0.6779, "step": 33500 }, { "epoch": 5.26, "learning_rate": 4.800193717647964e-05, "loss": 0.6842, "step": 34000 }, { "epoch": 5.26, "eval_bleu": 28.6873, "eval_gen_len": 18.515, "eval_loss": 1.174131989479065, "eval_runtime": 856.2355, "eval_samples_per_second": 15.097, "eval_steps_per_second": 0.944, "step": 34000 }, { "epoch": 5.34, "learning_rate": 4.794178212325922e-05, "loss": 0.6763, "step": 34500 }, { "epoch": 5.41, "learning_rate": 4.7880773756323556e-05, "loss": 0.6887, "step": 35000 }, { "epoch": 5.49, "learning_rate": 4.781891434486324e-05, "loss": 0.7097, "step": 35500 }, { "epoch": 5.57, "learning_rate": 4.775620618972326e-05, "loss": 0.7101, "step": 36000 }, { "epoch": 5.57, "eval_bleu": 28.0778, "eval_gen_len": 18.3422, "eval_loss": 1.1875863075256348, "eval_runtime": 848.3505, "eval_samples_per_second": 15.238, "eval_steps_per_second": 0.952, "step": 36000 }, { "epoch": 5.65, "learning_rate": 4.7692651623317395e-05, "loss": 0.7219, "step": 36500 }, { "epoch": 5.72, "learning_rate": 4.762825300954147e-05, "loss": 0.7489, "step": 37000 }, { "epoch": 5.8, "learning_rate": 4.756301274368545e-05, "loss": 0.7671, "step": 37500 }, { "epoch": 5.88, "learning_rate": 4.749693325234434e-05, "loss": 0.7697, "step": 38000 }, { "epoch": 5.88, "eval_bleu": 27.6338, "eval_gen_len": 18.6766, "eval_loss": 1.1897605657577515, "eval_runtime": 865.3805, "eval_samples_per_second": 14.938, "eval_steps_per_second": 0.934, "step": 38000 }, { "epoch": 5.96, "learning_rate": 4.74300169933279e-05, "loss": 0.7846, "step": 38500 }, { "epoch": 6.03, "learning_rate": 4.736226645556926e-05, "loss": 0.6989, "step": 39000 }, { "epoch": 6.11, "learning_rate": 4.729368415903233e-05, "loss": 0.615, "step": 39500 }, { "epoch": 6.19, "learning_rate": 4.722427265461809e-05, "loss": 0.6028, "step": 40000 }, { "epoch": 6.19, "eval_bleu": 28.0713, "eval_gen_len": 18.5903, "eval_loss": 1.2392680644989014, "eval_runtime": 864.5773, "eval_samples_per_second": 14.952, "eval_steps_per_second": 0.935, "step": 40000 }, { "epoch": 6.19, "step": 40000, "total_flos": 1.3870670734394655e+18, "train_loss": 1.0430022468566895, "train_runtime": 46145.4904, "train_samples_per_second": 89.65, "train_steps_per_second": 5.603 } ], "logging_steps": 500, "max_steps": 258560, "num_train_epochs": 40, "save_steps": 2000, "total_flos": 1.3870670734394655e+18, "trial_name": null, "trial_params": null }