{ "best_metric": 26.2764, "best_model_checkpoint": "./ko-en_mbartLarge_exp5p_linear/checkpoint-15000", "epoch": 8.352668213457077, "eval_steps": 1000, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "learning_rate": 5e-05, "loss": 1.9159, "step": 500 }, { "epoch": 0.46, "learning_rate": 4.970828471411903e-05, "loss": 1.7665, "step": 1000 }, { "epoch": 0.46, "eval_bleu": 17.6773, "eval_gen_len": 18.7196, "eval_loss": 1.6563962697982788, "eval_runtime": 295.5881, "eval_samples_per_second": 14.574, "eval_steps_per_second": 0.913, "step": 1000 }, { "epoch": 0.7, "learning_rate": 4.941656942823804e-05, "loss": 1.6536, "step": 1500 }, { "epoch": 0.93, "learning_rate": 4.912485414235706e-05, "loss": 1.5688, "step": 2000 }, { "epoch": 0.93, "eval_bleu": 20.8837, "eval_gen_len": 18.3983, "eval_loss": 1.493910789489746, "eval_runtime": 286.1303, "eval_samples_per_second": 15.056, "eval_steps_per_second": 0.944, "step": 2000 }, { "epoch": 1.16, "learning_rate": 4.883313885647608e-05, "loss": 1.4925, "step": 2500 }, { "epoch": 1.39, "learning_rate": 4.8541423570595104e-05, "loss": 1.457, "step": 3000 }, { "epoch": 1.39, "eval_bleu": 21.9168, "eval_gen_len": 18.458, "eval_loss": 1.4349554777145386, "eval_runtime": 286.2559, "eval_samples_per_second": 15.049, "eval_steps_per_second": 0.943, "step": 3000 }, { "epoch": 1.62, "learning_rate": 4.824970828471412e-05, "loss": 1.4164, "step": 3500 }, { "epoch": 1.86, "learning_rate": 4.795799299883314e-05, "loss": 1.4107, "step": 4000 }, { "epoch": 1.86, "eval_bleu": 22.8881, "eval_gen_len": 18.4826, "eval_loss": 1.3751940727233887, "eval_runtime": 290.8825, "eval_samples_per_second": 14.81, "eval_steps_per_second": 0.928, "step": 4000 }, { "epoch": 2.09, "learning_rate": 4.7666277712952163e-05, "loss": 1.3584, "step": 4500 }, { "epoch": 2.32, "learning_rate": 4.737456242707118e-05, "loss": 1.3039, "step": 5000 }, { "epoch": 2.32, "eval_bleu": 23.8115, "eval_gen_len": 18.4348, "eval_loss": 1.332729458808899, "eval_runtime": 285.9035, "eval_samples_per_second": 15.068, "eval_steps_per_second": 0.944, "step": 5000 }, { "epoch": 2.55, "learning_rate": 4.70828471411902e-05, "loss": 1.2963, "step": 5500 }, { "epoch": 2.78, "learning_rate": 4.679113185530922e-05, "loss": 1.282, "step": 6000 }, { "epoch": 2.78, "eval_bleu": 24.235, "eval_gen_len": 18.3561, "eval_loss": 1.3079338073730469, "eval_runtime": 284.0397, "eval_samples_per_second": 15.167, "eval_steps_per_second": 0.951, "step": 6000 }, { "epoch": 3.02, "learning_rate": 4.649941656942824e-05, "loss": 1.2761, "step": 6500 }, { "epoch": 3.25, "learning_rate": 4.620770128354726e-05, "loss": 1.2133, "step": 7000 }, { "epoch": 3.25, "eval_bleu": 24.8877, "eval_gen_len": 18.5204, "eval_loss": 1.2819560766220093, "eval_runtime": 284.2962, "eval_samples_per_second": 15.153, "eval_steps_per_second": 0.95, "step": 7000 }, { "epoch": 3.48, "learning_rate": 4.5915985997666276e-05, "loss": 1.1875, "step": 7500 }, { "epoch": 3.71, "learning_rate": 4.56242707117853e-05, "loss": 1.1787, "step": 8000 }, { "epoch": 3.71, "eval_bleu": 25.2719, "eval_gen_len": 18.415, "eval_loss": 1.2579996585845947, "eval_runtime": 283.3691, "eval_samples_per_second": 15.203, "eval_steps_per_second": 0.953, "step": 8000 }, { "epoch": 3.94, "learning_rate": 4.5332555425904324e-05, "loss": 1.175, "step": 8500 }, { "epoch": 4.18, "learning_rate": 4.5040840140023335e-05, "loss": 1.1154, "step": 9000 }, { "epoch": 4.18, "eval_bleu": 25.5507, "eval_gen_len": 18.3528, "eval_loss": 1.2543020248413086, "eval_runtime": 282.3342, "eval_samples_per_second": 15.259, "eval_steps_per_second": 0.956, "step": 9000 }, { "epoch": 4.41, "learning_rate": 4.474912485414236e-05, "loss": 1.0893, "step": 9500 }, { "epoch": 4.64, "learning_rate": 4.445740956826138e-05, "loss": 1.0956, "step": 10000 }, { "epoch": 4.64, "eval_bleu": 25.7284, "eval_gen_len": 18.5348, "eval_loss": 1.2415348291397095, "eval_runtime": 283.6466, "eval_samples_per_second": 15.188, "eval_steps_per_second": 0.952, "step": 10000 }, { "epoch": 4.87, "learning_rate": 4.41656942823804e-05, "loss": 1.0869, "step": 10500 }, { "epoch": 5.1, "learning_rate": 4.387397899649942e-05, "loss": 1.023, "step": 11000 }, { "epoch": 5.1, "eval_bleu": 25.7912, "eval_gen_len": 18.3347, "eval_loss": 1.2409833669662476, "eval_runtime": 281.8959, "eval_samples_per_second": 15.282, "eval_steps_per_second": 0.958, "step": 11000 }, { "epoch": 5.34, "learning_rate": 4.358226371061844e-05, "loss": 0.9568, "step": 11500 }, { "epoch": 5.57, "learning_rate": 4.329054842473746e-05, "loss": 0.95, "step": 12000 }, { "epoch": 5.57, "eval_bleu": 25.9921, "eval_gen_len": 18.2593, "eval_loss": 1.2326716184616089, "eval_runtime": 280.949, "eval_samples_per_second": 15.334, "eval_steps_per_second": 0.961, "step": 12000 }, { "epoch": 5.8, "learning_rate": 4.299883313885648e-05, "loss": 0.9611, "step": 12500 }, { "epoch": 6.03, "learning_rate": 4.2707117852975496e-05, "loss": 0.9476, "step": 13000 }, { "epoch": 6.03, "eval_bleu": 25.829, "eval_gen_len": 18.3686, "eval_loss": 1.2631200551986694, "eval_runtime": 282.7475, "eval_samples_per_second": 15.236, "eval_steps_per_second": 0.955, "step": 13000 }, { "epoch": 6.26, "learning_rate": 4.241540256709452e-05, "loss": 0.887, "step": 13500 }, { "epoch": 6.5, "learning_rate": 4.212368728121354e-05, "loss": 0.9061, "step": 14000 }, { "epoch": 6.5, "eval_bleu": 25.8316, "eval_gen_len": 18.7481, "eval_loss": 1.254765272140503, "eval_runtime": 286.2873, "eval_samples_per_second": 15.048, "eval_steps_per_second": 0.943, "step": 14000 }, { "epoch": 6.73, "learning_rate": 4.1831971995332556e-05, "loss": 0.8963, "step": 14500 }, { "epoch": 6.96, "learning_rate": 4.154025670945157e-05, "loss": 0.9037, "step": 15000 }, { "epoch": 6.96, "eval_bleu": 26.2764, "eval_gen_len": 18.3888, "eval_loss": 1.2307679653167725, "eval_runtime": 281.3307, "eval_samples_per_second": 15.313, "eval_steps_per_second": 0.96, "step": 15000 }, { "epoch": 7.19, "learning_rate": 4.12485414235706e-05, "loss": 0.7613, "step": 15500 }, { "epoch": 7.42, "learning_rate": 4.095682613768962e-05, "loss": 0.7431, "step": 16000 }, { "epoch": 7.42, "eval_bleu": 25.9268, "eval_gen_len": 18.256, "eval_loss": 1.2716145515441895, "eval_runtime": 279.8965, "eval_samples_per_second": 15.391, "eval_steps_per_second": 0.965, "step": 16000 }, { "epoch": 7.66, "learning_rate": 4.066511085180863e-05, "loss": 0.7538, "step": 16500 }, { "epoch": 7.89, "learning_rate": 4.037339556592766e-05, "loss": 0.7526, "step": 17000 }, { "epoch": 7.89, "eval_bleu": 25.9883, "eval_gen_len": 18.2052, "eval_loss": 1.265455722808838, "eval_runtime": 280.4684, "eval_samples_per_second": 15.36, "eval_steps_per_second": 0.963, "step": 17000 }, { "epoch": 8.12, "learning_rate": 4.0081680280046675e-05, "loss": 0.7074, "step": 17500 }, { "epoch": 8.35, "learning_rate": 3.97899649941657e-05, "loss": 0.6654, "step": 18000 }, { "epoch": 8.35, "eval_bleu": 25.6866, "eval_gen_len": 18.2217, "eval_loss": 1.311843991279602, "eval_runtime": 281.3073, "eval_samples_per_second": 15.314, "eval_steps_per_second": 0.96, "step": 18000 } ], "logging_steps": 500, "max_steps": 86200, "num_train_epochs": 40, "save_steps": 1000, "total_flos": 6.243067438154383e+17, "trial_name": null, "trial_params": null }