{ "best_metric": 0.5060370564460754, "best_model_checkpoint": "t5_results/checkpoint-46875", "epoch": 5.0, "global_step": 46875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.978752e-05, "loss": 0.7612, "step": 500 }, { "epoch": 0.11, "learning_rate": 1.9574613333333336e-05, "loss": 0.67, "step": 1000 }, { "epoch": 0.16, "learning_rate": 1.9361280000000002e-05, "loss": 0.6474, "step": 1500 }, { "epoch": 0.21, "learning_rate": 1.914794666666667e-05, "loss": 0.6322, "step": 2000 }, { "epoch": 0.27, "learning_rate": 1.8934613333333335e-05, "loss": 0.6293, "step": 2500 }, { "epoch": 0.32, "learning_rate": 1.8721706666666667e-05, "loss": 0.6208, "step": 3000 }, { "epoch": 0.37, "learning_rate": 1.8508373333333333e-05, "loss": 0.6115, "step": 3500 }, { "epoch": 0.43, "learning_rate": 1.8295040000000003e-05, "loss": 0.6073, "step": 4000 }, { "epoch": 0.48, "learning_rate": 1.8081706666666666e-05, "loss": 0.6049, "step": 4500 }, { "epoch": 0.53, "learning_rate": 1.7868373333333336e-05, "loss": 0.6014, "step": 5000 }, { "epoch": 0.59, "learning_rate": 1.7655040000000002e-05, "loss": 0.5932, "step": 5500 }, { "epoch": 0.64, "learning_rate": 1.744170666666667e-05, "loss": 0.5947, "step": 6000 }, { "epoch": 0.69, "learning_rate": 1.7228373333333335e-05, "loss": 0.5972, "step": 6500 }, { "epoch": 0.75, "learning_rate": 1.701504e-05, "loss": 0.5875, "step": 7000 }, { "epoch": 0.8, "learning_rate": 1.6801706666666668e-05, "loss": 0.5896, "step": 7500 }, { "epoch": 0.85, "learning_rate": 1.6588373333333335e-05, "loss": 0.5832, "step": 8000 }, { "epoch": 0.91, "learning_rate": 1.637546666666667e-05, "loss": 0.5866, "step": 8500 }, { "epoch": 0.96, "learning_rate": 1.616256e-05, "loss": 0.5815, "step": 9000 }, { "epoch": 1.0, "eval_gen_len": 17.2391, "eval_loss": 0.5298414826393127, "eval_rouge1": 72.5682, "eval_rouge2": 63.0274, "eval_rougeL": 71.9035, "eval_rougeLsum": 71.9355, "eval_runtime": 1951.2025, "eval_samples_per_second": 51.25, "eval_steps_per_second": 3.203, "step": 9375 }, { "epoch": 1.01, "learning_rate": 1.5949226666666667e-05, "loss": 0.5847, "step": 9500 }, { "epoch": 1.07, "learning_rate": 1.5735893333333334e-05, "loss": 0.5753, "step": 10000 }, { "epoch": 1.12, "learning_rate": 1.5522560000000003e-05, "loss": 0.5704, "step": 10500 }, { "epoch": 1.17, "learning_rate": 1.5309226666666666e-05, "loss": 0.5757, "step": 11000 }, { "epoch": 1.23, "learning_rate": 1.5095893333333333e-05, "loss": 0.571, "step": 11500 }, { "epoch": 1.28, "learning_rate": 1.4882560000000001e-05, "loss": 0.5684, "step": 12000 }, { "epoch": 1.33, "learning_rate": 1.4669226666666667e-05, "loss": 0.5713, "step": 12500 }, { "epoch": 1.39, "learning_rate": 1.4455893333333336e-05, "loss": 0.5652, "step": 13000 }, { "epoch": 1.44, "learning_rate": 1.4242986666666667e-05, "loss": 0.5671, "step": 13500 }, { "epoch": 1.49, "learning_rate": 1.4029653333333335e-05, "loss": 0.5646, "step": 14000 }, { "epoch": 1.55, "learning_rate": 1.3817173333333333e-05, "loss": 0.564, "step": 14500 }, { "epoch": 1.6, "learning_rate": 1.3603840000000001e-05, "loss": 0.5628, "step": 15000 }, { "epoch": 1.65, "learning_rate": 1.3390506666666668e-05, "loss": 0.5638, "step": 15500 }, { "epoch": 1.71, "learning_rate": 1.3177173333333336e-05, "loss": 0.5631, "step": 16000 }, { "epoch": 1.76, "learning_rate": 1.296384e-05, "loss": 0.5641, "step": 16500 }, { "epoch": 1.81, "learning_rate": 1.2750933333333335e-05, "loss": 0.5659, "step": 17000 }, { "epoch": 1.87, "learning_rate": 1.25376e-05, "loss": 0.5625, "step": 17500 }, { "epoch": 1.92, "learning_rate": 1.2324266666666666e-05, "loss": 0.5637, "step": 18000 }, { "epoch": 1.97, "learning_rate": 1.2110933333333335e-05, "loss": 0.5626, "step": 18500 }, { "epoch": 2.0, "eval_gen_len": 17.2327, "eval_loss": 0.5176606774330139, "eval_rouge1": 72.7237, "eval_rouge2": 63.3082, "eval_rougeL": 72.0632, "eval_rougeLsum": 72.0935, "eval_runtime": 1949.9946, "eval_samples_per_second": 51.282, "eval_steps_per_second": 3.205, "step": 18750 }, { "epoch": 2.03, "learning_rate": 1.1897600000000001e-05, "loss": 0.5541, "step": 19000 }, { "epoch": 2.08, "learning_rate": 1.1684266666666668e-05, "loss": 0.557, "step": 19500 }, { "epoch": 2.13, "learning_rate": 1.1470933333333334e-05, "loss": 0.5559, "step": 20000 }, { "epoch": 2.19, "learning_rate": 1.1257600000000002e-05, "loss": 0.5577, "step": 20500 }, { "epoch": 2.24, "learning_rate": 1.1044266666666667e-05, "loss": 0.5506, "step": 21000 }, { "epoch": 2.29, "learning_rate": 1.0830933333333333e-05, "loss": 0.5517, "step": 21500 }, { "epoch": 2.35, "learning_rate": 1.0617600000000001e-05, "loss": 0.551, "step": 22000 }, { "epoch": 2.4, "learning_rate": 1.0404266666666666e-05, "loss": 0.5507, "step": 22500 }, { "epoch": 2.45, "learning_rate": 1.0191360000000001e-05, "loss": 0.5501, "step": 23000 }, { "epoch": 2.51, "learning_rate": 9.978026666666667e-06, "loss": 0.552, "step": 23500 }, { "epoch": 2.56, "learning_rate": 9.764693333333334e-06, "loss": 0.5511, "step": 24000 }, { "epoch": 2.61, "learning_rate": 9.55136e-06, "loss": 0.5479, "step": 24500 }, { "epoch": 2.67, "learning_rate": 9.338453333333333e-06, "loss": 0.5522, "step": 25000 }, { "epoch": 2.72, "learning_rate": 9.12512e-06, "loss": 0.5511, "step": 25500 }, { "epoch": 2.77, "learning_rate": 8.912213333333335e-06, "loss": 0.5502, "step": 26000 }, { "epoch": 2.83, "learning_rate": 8.698880000000001e-06, "loss": 0.548, "step": 26500 }, { "epoch": 2.88, "learning_rate": 8.485546666666667e-06, "loss": 0.5513, "step": 27000 }, { "epoch": 2.93, "learning_rate": 8.272213333333334e-06, "loss": 0.5484, "step": 27500 }, { "epoch": 2.99, "learning_rate": 8.05888e-06, "loss": 0.547, "step": 28000 }, { "epoch": 3.0, "eval_gen_len": 17.2248, "eval_loss": 0.5098650455474854, "eval_rouge1": 72.8028, "eval_rouge2": 63.4587, "eval_rougeL": 72.1437, "eval_rougeLsum": 72.1738, "eval_runtime": 1955.4373, "eval_samples_per_second": 51.139, "eval_steps_per_second": 3.196, "step": 28125 }, { "epoch": 3.04, "learning_rate": 7.845546666666667e-06, "loss": 0.5466, "step": 28500 }, { "epoch": 3.09, "learning_rate": 7.632640000000002e-06, "loss": 0.5464, "step": 29000 }, { "epoch": 3.15, "learning_rate": 7.419306666666667e-06, "loss": 0.5484, "step": 29500 }, { "epoch": 3.2, "learning_rate": 7.2059733333333336e-06, "loss": 0.5421, "step": 30000 }, { "epoch": 3.25, "learning_rate": 6.992640000000001e-06, "loss": 0.5425, "step": 30500 }, { "epoch": 3.31, "learning_rate": 6.779306666666667e-06, "loss": 0.5446, "step": 31000 }, { "epoch": 3.36, "learning_rate": 6.565973333333335e-06, "loss": 0.5407, "step": 31500 }, { "epoch": 3.41, "learning_rate": 6.352640000000001e-06, "loss": 0.546, "step": 32000 }, { "epoch": 3.47, "learning_rate": 6.139306666666667e-06, "loss": 0.5415, "step": 32500 }, { "epoch": 3.52, "learning_rate": 5.925973333333334e-06, "loss": 0.5456, "step": 33000 }, { "epoch": 3.57, "learning_rate": 5.71264e-06, "loss": 0.5435, "step": 33500 }, { "epoch": 3.63, "learning_rate": 5.4997333333333335e-06, "loss": 0.5424, "step": 34000 }, { "epoch": 3.68, "learning_rate": 5.2868266666666665e-06, "loss": 0.5439, "step": 34500 }, { "epoch": 3.73, "learning_rate": 5.073493333333334e-06, "loss": 0.5402, "step": 35000 }, { "epoch": 3.79, "learning_rate": 4.86016e-06, "loss": 0.5396, "step": 35500 }, { "epoch": 3.84, "learning_rate": 4.6468266666666675e-06, "loss": 0.5362, "step": 36000 }, { "epoch": 3.89, "learning_rate": 4.4339200000000006e-06, "loss": 0.5459, "step": 36500 }, { "epoch": 3.95, "learning_rate": 4.220586666666667e-06, "loss": 0.5419, "step": 37000 }, { "epoch": 4.0, "learning_rate": 4.0072533333333335e-06, "loss": 0.5432, "step": 37500 }, { "epoch": 4.0, "eval_gen_len": 17.2265, "eval_loss": 0.5071600079536438, "eval_rouge1": 72.831, "eval_rouge2": 63.5122, "eval_rougeL": 72.1737, "eval_rougeLsum": 72.2038, "eval_runtime": 1947.9767, "eval_samples_per_second": 51.335, "eval_steps_per_second": 3.208, "step": 37500 }, { "epoch": 4.05, "learning_rate": 3.7939200000000003e-06, "loss": 0.5406, "step": 38000 }, { "epoch": 4.11, "learning_rate": 3.5810133333333334e-06, "loss": 0.5414, "step": 38500 }, { "epoch": 4.16, "learning_rate": 3.368106666666667e-06, "loss": 0.539, "step": 39000 }, { "epoch": 4.21, "learning_rate": 3.1547733333333337e-06, "loss": 0.5391, "step": 39500 }, { "epoch": 4.27, "learning_rate": 2.9414400000000006e-06, "loss": 0.5385, "step": 40000 }, { "epoch": 4.32, "learning_rate": 2.7281066666666666e-06, "loss": 0.5391, "step": 40500 }, { "epoch": 4.37, "learning_rate": 2.5147733333333335e-06, "loss": 0.5372, "step": 41000 }, { "epoch": 4.43, "learning_rate": 2.30144e-06, "loss": 0.5375, "step": 41500 }, { "epoch": 4.48, "learning_rate": 2.0885333333333334e-06, "loss": 0.5364, "step": 42000 }, { "epoch": 4.53, "learning_rate": 1.8752e-06, "loss": 0.5343, "step": 42500 }, { "epoch": 4.59, "learning_rate": 1.661866666666667e-06, "loss": 0.5386, "step": 43000 }, { "epoch": 4.64, "learning_rate": 1.4485333333333334e-06, "loss": 0.5362, "step": 43500 }, { "epoch": 4.69, "learning_rate": 1.2352e-06, "loss": 0.5385, "step": 44000 }, { "epoch": 4.75, "learning_rate": 1.0218666666666667e-06, "loss": 0.5411, "step": 44500 }, { "epoch": 4.8, "learning_rate": 8.085333333333334e-07, "loss": 0.537, "step": 45000 }, { "epoch": 4.85, "learning_rate": 5.952e-07, "loss": 0.5378, "step": 45500 }, { "epoch": 4.91, "learning_rate": 3.818666666666667e-07, "loss": 0.5407, "step": 46000 }, { "epoch": 4.96, "learning_rate": 1.6896000000000003e-07, "loss": 0.54, "step": 46500 }, { "epoch": 5.0, "eval_gen_len": 17.2227, "eval_loss": 0.5060370564460754, "eval_rouge1": 72.8556, "eval_rouge2": 63.5466, "eval_rougeL": 72.1975, "eval_rougeLsum": 72.228, "eval_runtime": 1951.1899, "eval_samples_per_second": 51.251, "eval_steps_per_second": 3.203, "step": 46875 } ], "max_steps": 46875, "num_train_epochs": 5, "total_flos": 3.315684370415616e+17, "trial_name": null, "trial_params": null }