{ "best_metric": 0.8995155096054077, "best_model_checkpoint": "./mbartLarge_koja_37p/checkpoint-33000", "epoch": 2.9064646820503786, "eval_steps": 5500, "global_step": 33000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.6666666666666667e-05, "loss": 3.5939, "step": 500 }, { "epoch": 0.09, "learning_rate": 3.3333333333333335e-05, "loss": 3.0254, "step": 1000 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 1.9133, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.9936851145520225e-05, "loss": 1.672, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.987370229104044e-05, "loss": 1.56, "step": 2500 }, { "epoch": 0.26, "learning_rate": 4.9810553436560664e-05, "loss": 1.4874, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.974740458208088e-05, "loss": 1.4345, "step": 3500 }, { "epoch": 0.35, "learning_rate": 4.96842557276011e-05, "loss": 1.3676, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.9621106873121326e-05, "loss": 1.3183, "step": 4500 }, { "epoch": 0.44, "learning_rate": 4.955795801864154e-05, "loss": 1.2987, "step": 5000 }, { "epoch": 0.48, "learning_rate": 4.9494809164161765e-05, "loss": 1.2712, "step": 5500 }, { "epoch": 0.48, "eval_bleu": 3.0858, "eval_gen_len": 18.1085, "eval_loss": 1.2203737497329712, "eval_runtime": 1528.8304, "eval_samples_per_second": 14.853, "eval_steps_per_second": 0.929, "step": 5500 }, { "epoch": 0.53, "learning_rate": 4.943166030968199e-05, "loss": 1.2405, "step": 6000 }, { "epoch": 0.57, "learning_rate": 4.9368511455202204e-05, "loss": 1.2262, "step": 6500 }, { "epoch": 0.62, "learning_rate": 4.930536260072243e-05, "loss": 1.1959, "step": 7000 }, { "epoch": 0.66, "learning_rate": 4.924221374624264e-05, "loss": 1.182, "step": 7500 }, { "epoch": 0.7, "learning_rate": 4.9179064891762866e-05, "loss": 1.1683, "step": 8000 }, { "epoch": 0.75, "learning_rate": 4.911591603728309e-05, "loss": 1.1489, "step": 8500 }, { "epoch": 0.79, "learning_rate": 4.9052767182803305e-05, "loss": 1.1365, "step": 9000 }, { "epoch": 0.84, "learning_rate": 4.898961832832353e-05, "loss": 1.1313, "step": 9500 }, { "epoch": 0.88, "learning_rate": 4.8926469473843744e-05, "loss": 1.1172, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.886332061936397e-05, "loss": 1.1123, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.880017176488419e-05, "loss": 1.0946, "step": 11000 }, { "epoch": 0.97, "eval_bleu": 3.3162, "eval_gen_len": 17.8651, "eval_loss": 1.0577712059020996, "eval_runtime": 1538.4057, "eval_samples_per_second": 14.76, "eval_steps_per_second": 0.923, "step": 11000 }, { "epoch": 1.01, "learning_rate": 4.8737022910404406e-05, "loss": 1.0856, "step": 11500 }, { "epoch": 1.06, "learning_rate": 4.867387405592463e-05, "loss": 1.0716, "step": 12000 }, { "epoch": 1.1, "learning_rate": 4.8610725201444845e-05, "loss": 1.0736, "step": 12500 }, { "epoch": 1.14, "learning_rate": 4.854757634696507e-05, "loss": 1.0367, "step": 13000 }, { "epoch": 1.19, "learning_rate": 4.848442749248529e-05, "loss": 1.0204, "step": 13500 }, { "epoch": 1.23, "learning_rate": 4.842127863800551e-05, "loss": 1.0533, "step": 14000 }, { "epoch": 1.28, "learning_rate": 4.835812978352573e-05, "loss": 1.0426, "step": 14500 }, { "epoch": 1.32, "learning_rate": 4.829498092904595e-05, "loss": 1.0261, "step": 15000 }, { "epoch": 1.37, "learning_rate": 4.823183207456617e-05, "loss": 1.0111, "step": 15500 }, { "epoch": 1.41, "learning_rate": 4.816868322008639e-05, "loss": 0.9862, "step": 16000 }, { "epoch": 1.45, "learning_rate": 4.810553436560661e-05, "loss": 0.9546, "step": 16500 }, { "epoch": 1.45, "eval_bleu": 5.6024, "eval_gen_len": 17.902, "eval_loss": 0.9687988758087158, "eval_runtime": 1530.7999, "eval_samples_per_second": 14.833, "eval_steps_per_second": 0.928, "step": 16500 }, { "epoch": 1.5, "learning_rate": 4.804238551112683e-05, "loss": 0.9642, "step": 17000 }, { "epoch": 1.54, "learning_rate": 4.7979236656647054e-05, "loss": 0.9364, "step": 17500 }, { "epoch": 1.59, "learning_rate": 4.791608780216727e-05, "loss": 0.931, "step": 18000 }, { "epoch": 1.63, "learning_rate": 4.785293894768749e-05, "loss": 0.915, "step": 18500 }, { "epoch": 1.67, "learning_rate": 4.778979009320771e-05, "loss": 0.9043, "step": 19000 }, { "epoch": 1.72, "learning_rate": 4.772664123872793e-05, "loss": 0.9158, "step": 19500 }, { "epoch": 1.76, "learning_rate": 4.7663492384248155e-05, "loss": 0.9068, "step": 20000 }, { "epoch": 1.81, "learning_rate": 4.760034352976837e-05, "loss": 0.9314, "step": 20500 }, { "epoch": 1.85, "learning_rate": 4.7537194675288594e-05, "loss": 0.9112, "step": 21000 }, { "epoch": 1.89, "learning_rate": 4.747404582080881e-05, "loss": 0.8862, "step": 21500 }, { "epoch": 1.94, "learning_rate": 4.741089696632903e-05, "loss": 0.89, "step": 22000 }, { "epoch": 1.94, "eval_bleu": 5.1453, "eval_gen_len": 17.6144, "eval_loss": 0.941440761089325, "eval_runtime": 1490.4507, "eval_samples_per_second": 15.235, "eval_steps_per_second": 0.953, "step": 22000 }, { "epoch": 1.98, "learning_rate": 4.7347748111849256e-05, "loss": 0.8726, "step": 22500 }, { "epoch": 2.03, "learning_rate": 4.728459925736947e-05, "loss": 0.8749, "step": 23000 }, { "epoch": 2.07, "learning_rate": 4.7221450402889695e-05, "loss": 0.858, "step": 23500 }, { "epoch": 2.11, "learning_rate": 4.715830154840992e-05, "loss": 0.8479, "step": 24000 }, { "epoch": 2.16, "learning_rate": 4.7095152693930134e-05, "loss": 0.8394, "step": 24500 }, { "epoch": 2.2, "learning_rate": 4.703200383945036e-05, "loss": 0.8472, "step": 25000 }, { "epoch": 2.25, "learning_rate": 4.696885498497057e-05, "loss": 0.7793, "step": 25500 }, { "epoch": 2.29, "learning_rate": 4.6905706130490796e-05, "loss": 0.7753, "step": 26000 }, { "epoch": 2.33, "learning_rate": 4.684255727601102e-05, "loss": 0.7655, "step": 26500 }, { "epoch": 2.38, "learning_rate": 4.6779408421531235e-05, "loss": 0.7666, "step": 27000 }, { "epoch": 2.42, "learning_rate": 4.671625956705146e-05, "loss": 0.834, "step": 27500 }, { "epoch": 2.42, "eval_bleu": 5.3985, "eval_gen_len": 17.6899, "eval_loss": 0.9212985038757324, "eval_runtime": 1489.7985, "eval_samples_per_second": 15.242, "eval_steps_per_second": 0.953, "step": 27500 }, { "epoch": 2.47, "learning_rate": 4.6653110712571674e-05, "loss": 0.8098, "step": 28000 }, { "epoch": 2.51, "learning_rate": 4.65899618580919e-05, "loss": 0.8153, "step": 28500 }, { "epoch": 2.55, "learning_rate": 4.652681300361212e-05, "loss": 0.7837, "step": 29000 }, { "epoch": 2.6, "learning_rate": 4.6463664149132336e-05, "loss": 0.8255, "step": 29500 }, { "epoch": 2.64, "learning_rate": 4.640051529465256e-05, "loss": 0.8345, "step": 30000 }, { "epoch": 2.69, "learning_rate": 4.6337366440172775e-05, "loss": 0.8061, "step": 30500 }, { "epoch": 2.73, "learning_rate": 4.6274217585693e-05, "loss": 0.7743, "step": 31000 }, { "epoch": 2.77, "learning_rate": 4.621106873121322e-05, "loss": 0.7759, "step": 31500 }, { "epoch": 2.82, "learning_rate": 4.614791987673344e-05, "loss": 0.7777, "step": 32000 }, { "epoch": 2.86, "learning_rate": 4.608477102225366e-05, "loss": 0.7537, "step": 32500 }, { "epoch": 2.91, "learning_rate": 4.602162216777388e-05, "loss": 0.7439, "step": 33000 }, { "epoch": 2.91, "eval_bleu": 6.2934, "eval_gen_len": 17.4862, "eval_loss": 0.8995155096054077, "eval_runtime": 1459.4956, "eval_samples_per_second": 15.558, "eval_steps_per_second": 0.973, "step": 33000 } ], "logging_steps": 500, "max_steps": 397390, "num_train_epochs": 35, "save_steps": 5500, "total_flos": 1.1443832137753559e+18, "trial_name": null, "trial_params": null }