{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.612756264236902, "eval_steps": 500, "global_step": 176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9999323080037623e-05, "loss": 0.8679, "step": 1 }, { "epoch": 0.18, "learning_rate": 1.9983081582712684e-05, "loss": 0.8526, "step": 5 }, { "epoch": 0.36, "learning_rate": 1.9932383577419432e-05, "loss": 0.8014, "step": 10 }, { "epoch": 0.55, "learning_rate": 1.9848077530122083e-05, "loss": 0.7437, "step": 15 }, { "epoch": 0.62, "eval_loss": 0.6867304444313049, "eval_runtime": 30.7118, "eval_samples_per_second": 6.512, "eval_steps_per_second": 0.814, "step": 17 }, { "epoch": 1.08, "learning_rate": 1.973044870579824e-05, "loss": 0.6989, "step": 20 }, { "epoch": 1.27, "learning_rate": 1.957989512315489e-05, "loss": 0.6673, "step": 25 }, { "epoch": 1.45, "learning_rate": 1.9396926207859085e-05, "loss": 0.6539, "step": 30 }, { "epoch": 1.63, "learning_rate": 1.9182161068802742e-05, "loss": 0.6372, "step": 35 }, { "epoch": 1.63, "eval_loss": 0.621515691280365, "eval_runtime": 30.0651, "eval_samples_per_second": 6.652, "eval_steps_per_second": 0.832, "step": 35 }, { "epoch": 2.17, "learning_rate": 1.8936326403234125e-05, "loss": 0.6243, "step": 40 }, { "epoch": 2.35, "learning_rate": 1.866025403784439e-05, "loss": 0.6186, "step": 45 }, { "epoch": 2.53, "learning_rate": 1.8354878114129368e-05, "loss": 0.6078, "step": 50 }, { "epoch": 2.64, "eval_loss": 0.5859256386756897, "eval_runtime": 30.0404, "eval_samples_per_second": 6.658, "eval_steps_per_second": 0.832, "step": 53 }, { "epoch": 3.07, "learning_rate": 1.802123192755044e-05, "loss": 0.5942, "step": 55 }, { "epoch": 3.25, "learning_rate": 1.766044443118978e-05, "loss": 0.5896, "step": 60 }, { "epoch": 3.44, "learning_rate": 1.7273736415730488e-05, "loss": 0.5848, "step": 65 }, { "epoch": 3.62, "learning_rate": 1.686241637868734e-05, "loss": 0.5724, "step": 70 }, { "epoch": 3.62, "eval_loss": 0.562507688999176, "eval_runtime": 30.0633, "eval_samples_per_second": 6.653, "eval_steps_per_second": 0.832, "step": 70 }, { "epoch": 4.15, "learning_rate": 1.6427876096865394e-05, "loss": 0.5703, "step": 75 }, { "epoch": 4.34, "learning_rate": 1.5971585917027864e-05, "loss": 0.5612, "step": 80 }, { "epoch": 4.52, "learning_rate": 1.5495089780708062e-05, "loss": 0.5613, "step": 85 }, { "epoch": 4.63, "eval_loss": 0.544775128364563, "eval_runtime": 30.0298, "eval_samples_per_second": 6.66, "eval_steps_per_second": 0.833, "step": 88 }, { "epoch": 5.06, "learning_rate": 1.5000000000000002e-05, "loss": 0.5509, "step": 90 }, { "epoch": 5.24, "learning_rate": 1.4487991802004625e-05, "loss": 0.5486, "step": 95 }, { "epoch": 5.42, "learning_rate": 1.396079766039157e-05, "loss": 0.5485, "step": 100 }, { "epoch": 5.6, "learning_rate": 1.342020143325669e-05, "loss": 0.5427, "step": 105 }, { "epoch": 5.64, "eval_loss": 0.5336735844612122, "eval_runtime": 30.037, "eval_samples_per_second": 6.658, "eval_steps_per_second": 0.832, "step": 106 }, { "epoch": 6.14, "learning_rate": 1.2868032327110904e-05, "loss": 0.5399, "step": 110 }, { "epoch": 6.32, "learning_rate": 1.2306158707424402e-05, "loss": 0.5376, "step": 115 }, { "epoch": 6.51, "learning_rate": 1.1736481776669307e-05, "loss": 0.5388, "step": 120 }, { "epoch": 6.62, "eval_loss": 0.5273823142051697, "eval_runtime": 30.0618, "eval_samples_per_second": 6.653, "eval_steps_per_second": 0.832, "step": 123 }, { "epoch": 7.04, "learning_rate": 1.1160929141252303e-05, "loss": 0.5321, "step": 125 }, { "epoch": 7.23, "learning_rate": 1.0581448289104759e-05, "loss": 0.5323, "step": 130 }, { "epoch": 7.41, "learning_rate": 1e-05, "loss": 0.5326, "step": 135 }, { "epoch": 7.59, "learning_rate": 9.418551710895243e-06, "loss": 0.5284, "step": 140 }, { "epoch": 7.63, "eval_loss": 0.5229138731956482, "eval_runtime": 30.0491, "eval_samples_per_second": 6.656, "eval_steps_per_second": 0.832, "step": 141 }, { "epoch": 8.13, "learning_rate": 8.839070858747697e-06, "loss": 0.529, "step": 145 }, { "epoch": 8.31, "learning_rate": 8.263518223330698e-06, "loss": 0.5277, "step": 150 }, { "epoch": 8.49, "learning_rate": 7.6938412925756e-06, "loss": 0.5285, "step": 155 }, { "epoch": 8.64, "eval_loss": 0.5188391208648682, "eval_runtime": 30.0316, "eval_samples_per_second": 6.66, "eval_steps_per_second": 0.832, "step": 159 }, { "epoch": 9.03, "learning_rate": 7.131967672889101e-06, "loss": 0.5219, "step": 160 }, { "epoch": 9.21, "learning_rate": 6.579798566743314e-06, "loss": 0.5249, "step": 165 }, { "epoch": 9.39, "learning_rate": 6.039202339608432e-06, "loss": 0.5249, "step": 170 }, { "epoch": 9.58, "learning_rate": 5.512008197995379e-06, "loss": 0.5222, "step": 175 }, { "epoch": 9.61, "eval_loss": 0.5165340304374695, "eval_runtime": 30.0644, "eval_samples_per_second": 6.652, "eval_steps_per_second": 0.832, "step": 176 }, { "epoch": 9.61, "step": 176, "total_flos": 1.59704785354752e+16, "train_loss": 0.5895595401525497, "train_runtime": 27383.9371, "train_samples_per_second": 2.562, "train_steps_per_second": 0.01 } ], "logging_steps": 5, "max_steps": 270, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.59704785354752e+16, "trial_name": null, "trial_params": null }