{ "best_metric": 2.0733225345611572, "best_model_checkpoint": "./checkpoints/barthez-deft-archeologie/checkpoint-1080", "epoch": 20.0, "global_step": 2160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.93, "learning_rate": 2.8694444444444447e-05, "loss": 3.4832, "step": 100 }, { "epoch": 1.0, "eval_gen_len": 15.8333, "eval_loss": 2.4237029552459717, "eval_rouge1": 22.6662, "eval_rouge2": 10.009, "eval_rougeL": 19.8729, "eval_rougeLsum": 19.8814, "eval_runtime": 3.8237, "eval_samples_per_second": 18.83, "eval_steps_per_second": 4.707, "step": 108 }, { "epoch": 1.85, "learning_rate": 2.7305555555555556e-05, "loss": 2.557, "step": 200 }, { "epoch": 2.0, "eval_gen_len": 19.0139, "eval_loss": 2.232834577560425, "eval_rouge1": 24.8102, "eval_rouge2": 11.9911, "eval_rougeL": 20.4773, "eval_rougeLsum": 20.696, "eval_runtime": 3.7488, "eval_samples_per_second": 19.206, "eval_steps_per_second": 4.802, "step": 216 }, { "epoch": 2.78, "learning_rate": 2.591666666666667e-05, "loss": 2.2702, "step": 300 }, { "epoch": 3.0, "eval_gen_len": 18.1944, "eval_loss": 2.2001593112945557, "eval_rouge1": 25.6482, "eval_rouge2": 11.6191, "eval_rougeL": 21.8383, "eval_rougeLsum": 21.9341, "eval_runtime": 3.6323, "eval_samples_per_second": 19.822, "eval_steps_per_second": 4.956, "step": 324 }, { "epoch": 3.7, "learning_rate": 2.4527777777777778e-05, "loss": 2.1119, "step": 400 }, { "epoch": 4.0, "eval_gen_len": 19.4306, "eval_loss": 2.126582145690918, "eval_rouge1": 25.5806, "eval_rouge2": 11.9765, "eval_rougeL": 21.3973, "eval_rougeLsum": 21.3503, "eval_runtime": 3.6876, "eval_samples_per_second": 19.525, "eval_steps_per_second": 4.881, "step": 432 }, { "epoch": 4.63, "learning_rate": 2.3138888888888887e-05, "loss": 1.9582, "step": 500 }, { "epoch": 5.0, "eval_gen_len": 19.1528, "eval_loss": 2.107192039489746, "eval_rouge1": 25.6578, "eval_rouge2": 12.2709, "eval_rougeL": 22.182, "eval_rougeLsum": 22.0548, "eval_runtime": 3.788, "eval_samples_per_second": 19.007, "eval_steps_per_second": 4.752, "step": 540 }, { "epoch": 5.56, "learning_rate": 2.175e-05, "loss": 1.8137, "step": 600 }, { "epoch": 6.0, "eval_gen_len": 19.4722, "eval_loss": 2.100757122039795, "eval_rouge1": 26.5272, "eval_rouge2": 11.4033, "eval_rougeL": 22.359, "eval_rougeLsum": 22.3259, "eval_runtime": 3.6704, "eval_samples_per_second": 19.617, "eval_steps_per_second": 4.904, "step": 648 }, { "epoch": 6.48, "learning_rate": 2.0361111111111113e-05, "loss": 1.7725, "step": 700 }, { "epoch": 7.0, "eval_gen_len": 19.1806, "eval_loss": 2.107440948486328, "eval_rouge1": 25.0405, "eval_rouge2": 11.1773, "eval_rougeL": 21.1369, "eval_rougeLsum": 21.1847, "eval_runtime": 3.6936, "eval_samples_per_second": 19.493, "eval_steps_per_second": 4.873, "step": 756 }, { "epoch": 7.41, "learning_rate": 1.8972222222222222e-05, "loss": 1.6772, "step": 800 }, { "epoch": 8.0, "eval_gen_len": 19.3333, "eval_loss": 2.0958752632141113, "eval_rouge1": 26.5237, "eval_rouge2": 11.6028, "eval_rougeL": 22.5018, "eval_rougeLsum": 22.3931, "eval_runtime": 3.6558, "eval_samples_per_second": 19.695, "eval_steps_per_second": 4.924, "step": 864 }, { "epoch": 8.33, "learning_rate": 1.7583333333333335e-05, "loss": 1.5798, "step": 900 }, { "epoch": 9.0, "eval_gen_len": 19.7222, "eval_loss": 2.097611904144287, "eval_rouge1": 27.7443, "eval_rouge2": 11.9898, "eval_rougeL": 22.4052, "eval_rougeLsum": 22.2954, "eval_runtime": 3.7405, "eval_samples_per_second": 19.249, "eval_steps_per_second": 4.812, "step": 972 }, { "epoch": 9.26, "learning_rate": 1.6194444444444448e-05, "loss": 1.4753, "step": 1000 }, { "epoch": 10.0, "eval_gen_len": 19.8194, "eval_loss": 2.0733225345611572, "eval_rouge1": 28.3502, "eval_rouge2": 12.9162, "eval_rougeL": 22.6352, "eval_rougeLsum": 22.6015, "eval_runtime": 3.6963, "eval_samples_per_second": 19.479, "eval_steps_per_second": 4.87, "step": 1080 }, { "epoch": 10.19, "learning_rate": 1.4819444444444445e-05, "loss": 1.4646, "step": 1100 }, { "epoch": 11.0, "eval_gen_len": 19.6111, "eval_loss": 2.109070062637329, "eval_rouge1": 27.9198, "eval_rouge2": 12.8591, "eval_rougeL": 23.0718, "eval_rougeLsum": 23.0779, "eval_runtime": 3.7289, "eval_samples_per_second": 19.308, "eval_steps_per_second": 4.827, "step": 1188 }, { "epoch": 11.11, "learning_rate": 1.3430555555555556e-05, "loss": 1.4082, "step": 1200 }, { "epoch": 12.0, "eval_gen_len": 19.4861, "eval_loss": 2.103557586669922, "eval_rouge1": 28.8509, "eval_rouge2": 13.0987, "eval_rougeL": 23.4189, "eval_rougeLsum": 23.5044, "eval_runtime": 3.6418, "eval_samples_per_second": 19.771, "eval_steps_per_second": 4.943, "step": 1296 }, { "epoch": 12.04, "learning_rate": 1.2041666666666667e-05, "loss": 1.3951, "step": 1300 }, { "epoch": 12.96, "learning_rate": 1.0652777777777778e-05, "loss": 1.2862, "step": 1400 }, { "epoch": 13.0, "eval_gen_len": 19.8611, "eval_loss": 2.122204065322876, "eval_rouge1": 28.6641, "eval_rouge2": 12.8157, "eval_rougeL": 22.6799, "eval_rougeLsum": 22.7051, "eval_runtime": 3.6343, "eval_samples_per_second": 19.811, "eval_steps_per_second": 4.953, "step": 1404 }, { "epoch": 13.89, "learning_rate": 9.263888888888889e-06, "loss": 1.2612, "step": 1500 }, { "epoch": 14.0, "eval_gen_len": 19.875, "eval_loss": 2.14872407913208, "eval_rouge1": 26.9709, "eval_rouge2": 11.6084, "eval_rougeL": 22.0312, "eval_rougeLsum": 22.0543, "eval_runtime": 3.7019, "eval_samples_per_second": 19.45, "eval_steps_per_second": 4.862, "step": 1512 }, { "epoch": 14.81, "learning_rate": 7.875e-06, "loss": 1.2327, "step": 1600 }, { "epoch": 15.0, "eval_gen_len": 19.7361, "eval_loss": 2.180756092071533, "eval_rouge1": 28.218, "eval_rouge2": 12.6239, "eval_rougeL": 22.7372, "eval_rougeLsum": 22.7881, "eval_runtime": 3.6942, "eval_samples_per_second": 19.49, "eval_steps_per_second": 4.872, "step": 1620 }, { "epoch": 15.74, "learning_rate": 6.486111111111111e-06, "loss": 1.2264, "step": 1700 }, { "epoch": 16.0, "eval_gen_len": 19.7639, "eval_loss": 2.177760124206543, "eval_rouge1": 26.7393, "eval_rouge2": 11.4474, "eval_rougeL": 21.6057, "eval_rougeLsum": 21.555, "eval_runtime": 3.7636, "eval_samples_per_second": 19.131, "eval_steps_per_second": 4.783, "step": 1728 }, { "epoch": 16.67, "learning_rate": 5.097222222222222e-06, "loss": 1.1848, "step": 1800 }, { "epoch": 17.0, "eval_gen_len": 19.6806, "eval_loss": 2.1995341777801514, "eval_rouge1": 27.6902, "eval_rouge2": 12.1082, "eval_rougeL": 22.0406, "eval_rougeLsum": 22.0101, "eval_runtime": 3.6502, "eval_samples_per_second": 19.725, "eval_steps_per_second": 4.931, "step": 1836 }, { "epoch": 17.59, "learning_rate": 3.7083333333333335e-06, "loss": 1.133, "step": 1900 }, { "epoch": 18.0, "eval_gen_len": 19.8056, "eval_loss": 2.2037861347198486, "eval_rouge1": 27.0402, "eval_rouge2": 12.1846, "eval_rougeL": 21.7793, "eval_rougeLsum": 21.7513, "eval_runtime": 3.6766, "eval_samples_per_second": 19.583, "eval_steps_per_second": 4.896, "step": 1944 }, { "epoch": 18.52, "learning_rate": 2.319444444444444e-06, "loss": 1.168, "step": 2000 }, { "epoch": 19.0, "eval_gen_len": 19.7222, "eval_loss": 2.2116100788116455, "eval_rouge1": 27.5149, "eval_rouge2": 11.9876, "eval_rougeL": 22.1113, "eval_rougeLsum": 22.1527, "eval_runtime": 3.6273, "eval_samples_per_second": 19.849, "eval_steps_per_second": 4.962, "step": 2052 }, { "epoch": 19.44, "learning_rate": 9.305555555555555e-07, "loss": 1.1206, "step": 2100 }, { "epoch": 20.0, "eval_gen_len": 19.5972, "eval_loss": 2.213322639465332, "eval_rouge1": 28.2321, "eval_rouge2": 12.677, "eval_rougeL": 22.749, "eval_rougeLsum": 22.8485, "eval_runtime": 3.6293, "eval_samples_per_second": 19.839, "eval_steps_per_second": 4.96, "step": 2160 }, { "epoch": 20.0, "step": 2160, "total_flos": 2429194384871424.0, "train_loss": 1.6326574502167879, "train_runtime": 371.3395, "train_samples_per_second": 23.213, "train_steps_per_second": 5.817 } ], "max_steps": 2160, "num_train_epochs": 20, "total_flos": 2429194384871424.0, "trial_name": null, "trial_params": null }