{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 33000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "learning_rate": 1.969939393939394e-05, "loss": 2.4249, "step": 500 }, { "epoch": 0.45, "learning_rate": 1.9396363636363637e-05, "loss": 2.3742, "step": 1000 }, { "epoch": 0.68, "learning_rate": 1.9093939393939395e-05, "loss": 2.3454, "step": 1500 }, { "epoch": 0.91, "learning_rate": 1.8790909090909093e-05, "loss": 2.3292, "step": 2000 }, { "epoch": 1.0, "eval_loss": 2.4535229206085205, "eval_runtime": 1.8377, "eval_samples_per_second": 136.041, "eval_steps_per_second": 22.855, "step": 2200 }, { "epoch": 1.14, "learning_rate": 1.848787878787879e-05, "loss": 2.1317, "step": 2500 }, { "epoch": 1.36, "learning_rate": 1.8184848484848487e-05, "loss": 2.0183, "step": 3000 }, { "epoch": 1.59, "learning_rate": 1.788181818181818e-05, "loss": 2.0052, "step": 3500 }, { "epoch": 1.82, "learning_rate": 1.757878787878788e-05, "loss": 1.9885, "step": 4000 }, { "epoch": 2.0, "eval_loss": 2.4263100624084473, "eval_runtime": 1.8704, "eval_samples_per_second": 133.662, "eval_steps_per_second": 22.455, "step": 4400 }, { "epoch": 2.05, "learning_rate": 1.727575757575758e-05, "loss": 1.9505, "step": 4500 }, { "epoch": 2.27, "learning_rate": 1.6972727272727273e-05, "loss": 1.7025, "step": 5000 }, { "epoch": 2.5, "learning_rate": 1.6669696969696972e-05, "loss": 1.717, "step": 5500 }, { "epoch": 2.73, "learning_rate": 1.636727272727273e-05, "loss": 1.7028, "step": 6000 }, { "epoch": 2.95, "learning_rate": 1.6064242424242428e-05, "loss": 1.741, "step": 6500 }, { "epoch": 3.0, "eval_loss": 2.508474111557007, "eval_runtime": 1.8606, "eval_samples_per_second": 134.366, "eval_steps_per_second": 22.573, "step": 6600 }, { "epoch": 3.18, "learning_rate": 1.5761212121212123e-05, "loss": 1.4946, "step": 7000 }, { "epoch": 3.41, "learning_rate": 1.5458181818181818e-05, "loss": 1.4504, "step": 7500 }, { "epoch": 3.64, "learning_rate": 1.5155151515151516e-05, "loss": 1.4769, "step": 8000 }, { "epoch": 3.86, "learning_rate": 1.4852121212121213e-05, "loss": 1.4818, "step": 8500 }, { "epoch": 4.0, "eval_loss": 2.5952579975128174, "eval_runtime": 1.911, "eval_samples_per_second": 130.825, "eval_steps_per_second": 21.979, "step": 8800 }, { "epoch": 4.09, "learning_rate": 1.4549090909090911e-05, "loss": 1.3898, "step": 9000 }, { "epoch": 4.32, "learning_rate": 1.4246666666666669e-05, "loss": 1.218, "step": 9500 }, { "epoch": 4.55, "learning_rate": 1.3943636363636365e-05, "loss": 1.2481, "step": 10000 }, { "epoch": 4.77, "learning_rate": 1.364060606060606e-05, "loss": 1.2518, "step": 10500 }, { "epoch": 5.0, "learning_rate": 1.3337575757575759e-05, "loss": 1.2692, "step": 11000 }, { "epoch": 5.0, "eval_loss": 2.763364791870117, "eval_runtime": 1.8787, "eval_samples_per_second": 133.072, "eval_steps_per_second": 22.356, "step": 11000 }, { "epoch": 5.23, "learning_rate": 1.3035151515151516e-05, "loss": 1.0289, "step": 11500 }, { "epoch": 5.45, "learning_rate": 1.2732727272727275e-05, "loss": 1.0458, "step": 12000 }, { "epoch": 5.68, "learning_rate": 1.2429696969696972e-05, "loss": 1.0494, "step": 12500 }, { "epoch": 5.91, "learning_rate": 1.2126666666666667e-05, "loss": 1.057, "step": 13000 }, { "epoch": 6.0, "eval_loss": 2.8617300987243652, "eval_runtime": 1.8519, "eval_samples_per_second": 135.0, "eval_steps_per_second": 22.68, "step": 13200 }, { "epoch": 6.14, "learning_rate": 1.1823636363636364e-05, "loss": 0.9342, "step": 13500 }, { "epoch": 6.36, "learning_rate": 1.152060606060606e-05, "loss": 0.8533, "step": 14000 }, { "epoch": 6.59, "learning_rate": 1.1217575757575759e-05, "loss": 0.8949, "step": 14500 }, { "epoch": 6.82, "learning_rate": 1.0914545454545456e-05, "loss": 0.8928, "step": 15000 }, { "epoch": 7.0, "eval_loss": 3.067075729370117, "eval_runtime": 1.8518, "eval_samples_per_second": 135.007, "eval_steps_per_second": 22.681, "step": 15400 }, { "epoch": 7.05, "learning_rate": 1.0611515151515152e-05, "loss": 0.8587, "step": 15500 }, { "epoch": 7.27, "learning_rate": 1.030848484848485e-05, "loss": 0.7187, "step": 16000 }, { "epoch": 7.5, "learning_rate": 1.0006060606060606e-05, "loss": 0.7212, "step": 16500 }, { "epoch": 7.73, "learning_rate": 9.703030303030305e-06, "loss": 0.7395, "step": 17000 }, { "epoch": 7.95, "learning_rate": 9.4e-06, "loss": 0.758, "step": 17500 }, { "epoch": 8.0, "eval_loss": 3.21189546585083, "eval_runtime": 1.8755, "eval_samples_per_second": 133.301, "eval_steps_per_second": 22.395, "step": 17600 }, { "epoch": 8.18, "learning_rate": 9.097575757575759e-06, "loss": 0.6303, "step": 18000 }, { "epoch": 8.41, "learning_rate": 8.794545454545456e-06, "loss": 0.5999, "step": 18500 }, { "epoch": 8.64, "learning_rate": 8.491515151515152e-06, "loss": 0.6254, "step": 19000 }, { "epoch": 8.86, "learning_rate": 8.188484848484849e-06, "loss": 0.6222, "step": 19500 }, { "epoch": 9.0, "eval_loss": 3.3879506587982178, "eval_runtime": 1.8402, "eval_samples_per_second": 135.853, "eval_steps_per_second": 22.823, "step": 19800 }, { "epoch": 9.09, "learning_rate": 7.885454545454546e-06, "loss": 0.5877, "step": 20000 }, { "epoch": 9.32, "learning_rate": 7.582424242424243e-06, "loss": 0.5085, "step": 20500 }, { "epoch": 9.55, "learning_rate": 7.279393939393939e-06, "loss": 0.5189, "step": 21000 }, { "epoch": 9.77, "learning_rate": 6.976363636363637e-06, "loss": 0.5198, "step": 21500 }, { "epoch": 10.0, "learning_rate": 6.673939393939395e-06, "loss": 0.5228, "step": 22000 }, { "epoch": 10.0, "eval_loss": 3.485287666320801, "eval_runtime": 1.8718, "eval_samples_per_second": 133.559, "eval_steps_per_second": 22.438, "step": 22000 }, { "epoch": 10.23, "learning_rate": 6.371515151515152e-06, "loss": 0.4323, "step": 22500 }, { "epoch": 10.45, "learning_rate": 6.068484848484849e-06, "loss": 0.4348, "step": 23000 }, { "epoch": 10.68, "learning_rate": 5.7654545454545465e-06, "loss": 0.4376, "step": 23500 }, { "epoch": 10.91, "learning_rate": 5.4624242424242424e-06, "loss": 0.4441, "step": 24000 }, { "epoch": 11.0, "eval_loss": 3.6241962909698486, "eval_runtime": 1.8617, "eval_samples_per_second": 134.283, "eval_steps_per_second": 22.56, "step": 24200 }, { "epoch": 11.14, "learning_rate": 5.15939393939394e-06, "loss": 0.3995, "step": 24500 }, { "epoch": 11.36, "learning_rate": 4.856363636363637e-06, "loss": 0.3728, "step": 25000 }, { "epoch": 11.59, "learning_rate": 4.5533333333333335e-06, "loss": 0.3743, "step": 25500 }, { "epoch": 11.82, "learning_rate": 4.250303030303031e-06, "loss": 0.3787, "step": 26000 }, { "epoch": 12.0, "eval_loss": 3.684976100921631, "eval_runtime": 1.8613, "eval_samples_per_second": 134.316, "eval_steps_per_second": 22.565, "step": 26400 }, { "epoch": 12.05, "learning_rate": 3.947272727272727e-06, "loss": 0.374, "step": 26500 }, { "epoch": 12.27, "learning_rate": 3.645454545454546e-06, "loss": 0.3186, "step": 27000 }, { "epoch": 12.5, "learning_rate": 3.3424242424242424e-06, "loss": 0.3265, "step": 27500 }, { "epoch": 12.73, "learning_rate": 3.03939393939394e-06, "loss": 0.3263, "step": 28000 }, { "epoch": 12.95, "learning_rate": 2.7363636363636363e-06, "loss": 0.3312, "step": 28500 }, { "epoch": 13.0, "eval_loss": 3.783233880996704, "eval_runtime": 1.8573, "eval_samples_per_second": 134.603, "eval_steps_per_second": 22.613, "step": 28600 }, { "epoch": 13.18, "learning_rate": 2.4333333333333335e-06, "loss": 0.2967, "step": 29000 }, { "epoch": 13.41, "learning_rate": 2.130909090909091e-06, "loss": 0.2904, "step": 29500 }, { "epoch": 13.64, "learning_rate": 1.827878787878788e-06, "loss": 0.293, "step": 30000 }, { "epoch": 13.86, "learning_rate": 1.5248484848484849e-06, "loss": 0.2893, "step": 30500 }, { "epoch": 14.0, "eval_loss": 3.7963521480560303, "eval_runtime": 1.8603, "eval_samples_per_second": 134.387, "eval_steps_per_second": 22.577, "step": 30800 }, { "epoch": 14.09, "learning_rate": 1.221818181818182e-06, "loss": 0.2799, "step": 31000 }, { "epoch": 14.32, "learning_rate": 9.187878787878789e-07, "loss": 0.2723, "step": 31500 }, { "epoch": 14.55, "learning_rate": 6.163636363636364e-07, "loss": 0.2673, "step": 32000 }, { "epoch": 14.77, "learning_rate": 3.1333333333333333e-07, "loss": 0.2642, "step": 32500 }, { "epoch": 15.0, "learning_rate": 1.0303030303030303e-08, "loss": 0.2671, "step": 33000 } ], "max_steps": 33000, "num_train_epochs": 15, "total_flos": 1.1231089438777344e+17, "trial_name": null, "trial_params": null }