{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 47275, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.9471179270227394e-05, "loss": 2.856, "step": 500 }, { "epoch": 0.11, "learning_rate": 4.8942358540454786e-05, "loss": 2.5599, "step": 1000 }, { "epoch": 0.16, "learning_rate": 4.841353781068218e-05, "loss": 2.4166, "step": 1500 }, { "epoch": 0.21, "learning_rate": 4.7884717080909575e-05, "loss": 2.3151, "step": 2000 }, { "epoch": 0.26, "learning_rate": 4.735589635113697e-05, "loss": 2.2732, "step": 2500 }, { "epoch": 0.32, "learning_rate": 4.682707562136436e-05, "loss": 2.2248, "step": 3000 }, { "epoch": 0.37, "learning_rate": 4.629825489159175e-05, "loss": 2.1665, "step": 3500 }, { "epoch": 0.42, "learning_rate": 4.576943416181915e-05, "loss": 2.1445, "step": 4000 }, { "epoch": 0.48, "learning_rate": 4.524061343204653e-05, "loss": 2.1219, "step": 4500 }, { "epoch": 0.53, "learning_rate": 4.471179270227393e-05, "loss": 2.0906, "step": 5000 }, { "epoch": 0.58, "learning_rate": 4.418297197250132e-05, "loss": 2.0674, "step": 5500 }, { "epoch": 0.63, "learning_rate": 4.365415124272872e-05, "loss": 2.0508, "step": 6000 }, { "epoch": 0.69, "learning_rate": 4.3125330512956106e-05, "loss": 2.0176, "step": 6500 }, { "epoch": 0.74, "learning_rate": 4.2596509783183505e-05, "loss": 1.9954, "step": 7000 }, { "epoch": 0.79, "learning_rate": 4.2067689053410896e-05, "loss": 1.9745, "step": 7500 }, { "epoch": 0.85, "learning_rate": 4.153886832363829e-05, "loss": 1.9776, "step": 8000 }, { "epoch": 0.9, "learning_rate": 4.1011105235325226e-05, "loss": 1.9579, "step": 8500 }, { "epoch": 0.95, "learning_rate": 4.048228450555262e-05, "loss": 1.9384, "step": 9000 }, { "epoch": 1.0, "learning_rate": 3.9953463775780016e-05, "loss": 1.914, "step": 9500 }, { "epoch": 1.06, "learning_rate": 3.94246430460074e-05, "loss": 1.7926, "step": 10000 }, { "epoch": 1.11, "learning_rate": 3.8896879957694345e-05, "loss": 1.7862, "step": 10500 }, { "epoch": 1.16, "learning_rate": 3.8369116869381283e-05, "loss": 1.7798, "step": 11000 }, { "epoch": 1.22, "learning_rate": 3.7841353781068215e-05, "loss": 1.7871, "step": 11500 }, { "epoch": 1.27, "learning_rate": 3.731253305129561e-05, "loss": 1.7872, "step": 12000 }, { "epoch": 1.32, "learning_rate": 3.6783712321523005e-05, "loss": 1.7669, "step": 12500 }, { "epoch": 1.37, "learning_rate": 3.62548915917504e-05, "loss": 1.7631, "step": 13000 }, { "epoch": 1.43, "learning_rate": 3.572607086197779e-05, "loss": 1.7672, "step": 13500 }, { "epoch": 1.48, "learning_rate": 3.5197250132205186e-05, "loss": 1.7741, "step": 14000 }, { "epoch": 1.53, "learning_rate": 3.466842940243258e-05, "loss": 1.7571, "step": 14500 }, { "epoch": 1.59, "learning_rate": 3.413960867265997e-05, "loss": 1.7444, "step": 15000 }, { "epoch": 1.64, "learning_rate": 3.361078794288736e-05, "loss": 1.742, "step": 15500 }, { "epoch": 1.69, "learning_rate": 3.30830248545743e-05, "loss": 1.734, "step": 16000 }, { "epoch": 1.75, "learning_rate": 3.25542041248017e-05, "loss": 1.7277, "step": 16500 }, { "epoch": 1.8, "learning_rate": 3.202538339502908e-05, "loss": 1.7379, "step": 17000 }, { "epoch": 1.85, "learning_rate": 3.149656266525648e-05, "loss": 1.7307, "step": 17500 }, { "epoch": 1.9, "learning_rate": 3.096879957694342e-05, "loss": 1.7145, "step": 18000 }, { "epoch": 1.96, "learning_rate": 3.0439978847170807e-05, "loss": 1.7195, "step": 18500 }, { "epoch": 2.01, "learning_rate": 2.99111581173982e-05, "loss": 1.6885, "step": 19000 }, { "epoch": 2.06, "learning_rate": 2.9382337387625597e-05, "loss": 1.5763, "step": 19500 }, { "epoch": 2.12, "learning_rate": 2.885351665785299e-05, "loss": 1.5877, "step": 20000 }, { "epoch": 2.17, "learning_rate": 2.832469592808038e-05, "loss": 1.5775, "step": 20500 }, { "epoch": 2.22, "learning_rate": 2.7795875198307775e-05, "loss": 1.584, "step": 21000 }, { "epoch": 2.27, "learning_rate": 2.726705446853517e-05, "loss": 1.5868, "step": 21500 }, { "epoch": 2.33, "learning_rate": 2.6738233738762558e-05, "loss": 1.583, "step": 22000 }, { "epoch": 2.38, "learning_rate": 2.6209413008989953e-05, "loss": 1.5835, "step": 22500 }, { "epoch": 2.43, "learning_rate": 2.568164992067689e-05, "loss": 1.5769, "step": 23000 }, { "epoch": 2.49, "learning_rate": 2.5152829190904286e-05, "loss": 1.565, "step": 23500 }, { "epoch": 2.54, "learning_rate": 2.4624008461131677e-05, "loss": 1.5741, "step": 24000 }, { "epoch": 2.59, "learning_rate": 2.409518773135907e-05, "loss": 1.5794, "step": 24500 }, { "epoch": 2.64, "learning_rate": 2.3566367001586464e-05, "loss": 1.548, "step": 25000 }, { "epoch": 2.7, "learning_rate": 2.3037546271813855e-05, "loss": 1.5791, "step": 25500 }, { "epoch": 2.75, "learning_rate": 2.2509783183500794e-05, "loss": 1.5628, "step": 26000 }, { "epoch": 2.8, "learning_rate": 2.198096245372819e-05, "loss": 1.5574, "step": 26500 }, { "epoch": 2.86, "learning_rate": 2.145214172395558e-05, "loss": 1.5635, "step": 27000 }, { "epoch": 2.91, "learning_rate": 2.0923320994182975e-05, "loss": 1.5682, "step": 27500 }, { "epoch": 2.96, "learning_rate": 2.0394500264410367e-05, "loss": 1.5423, "step": 28000 }, { "epoch": 3.01, "learning_rate": 1.986567953463776e-05, "loss": 1.5218, "step": 28500 }, { "epoch": 3.07, "learning_rate": 1.9336858804865153e-05, "loss": 1.4528, "step": 29000 }, { "epoch": 3.12, "learning_rate": 1.8808038075092545e-05, "loss": 1.4557, "step": 29500 }, { "epoch": 3.17, "learning_rate": 1.8280274986779483e-05, "loss": 1.4418, "step": 30000 }, { "epoch": 3.23, "learning_rate": 1.775251189846642e-05, "loss": 1.4528, "step": 30500 }, { "epoch": 3.28, "learning_rate": 1.7223691168693816e-05, "loss": 1.4406, "step": 31000 }, { "epoch": 3.33, "learning_rate": 1.6694870438921207e-05, "loss": 1.4369, "step": 31500 }, { "epoch": 3.38, "learning_rate": 1.6166049709148602e-05, "loss": 1.4498, "step": 32000 }, { "epoch": 3.44, "learning_rate": 1.5637228979375994e-05, "loss": 1.4466, "step": 32500 }, { "epoch": 3.49, "learning_rate": 1.5109465891062929e-05, "loss": 1.4383, "step": 33000 }, { "epoch": 3.54, "learning_rate": 1.4580645161290324e-05, "loss": 1.4497, "step": 33500 }, { "epoch": 3.6, "learning_rate": 1.4051824431517715e-05, "loss": 1.4426, "step": 34000 }, { "epoch": 3.65, "learning_rate": 1.352300370174511e-05, "loss": 1.4311, "step": 34500 }, { "epoch": 3.7, "learning_rate": 1.2994182971972502e-05, "loss": 1.4384, "step": 35000 }, { "epoch": 3.75, "learning_rate": 1.2465362242199895e-05, "loss": 1.4399, "step": 35500 }, { "epoch": 3.81, "learning_rate": 1.1936541512427288e-05, "loss": 1.4395, "step": 36000 }, { "epoch": 3.86, "learning_rate": 1.1407720782654681e-05, "loss": 1.428, "step": 36500 }, { "epoch": 3.91, "learning_rate": 1.0878900052882075e-05, "loss": 1.4439, "step": 37000 }, { "epoch": 3.97, "learning_rate": 1.0351136964569011e-05, "loss": 1.4361, "step": 37500 }, { "epoch": 4.02, "learning_rate": 9.822316234796404e-06, "loss": 1.4036, "step": 38000 }, { "epoch": 4.07, "learning_rate": 9.293495505023798e-06, "loss": 1.3662, "step": 38500 }, { "epoch": 4.12, "learning_rate": 8.76467477525119e-06, "loss": 1.3538, "step": 39000 }, { "epoch": 4.18, "learning_rate": 8.236911686938129e-06, "loss": 1.3632, "step": 39500 }, { "epoch": 4.23, "learning_rate": 7.708090957165522e-06, "loss": 1.3554, "step": 40000 }, { "epoch": 4.28, "learning_rate": 7.1792702273929146e-06, "loss": 1.3577, "step": 40500 }, { "epoch": 4.34, "learning_rate": 6.650449497620308e-06, "loss": 1.3537, "step": 41000 }, { "epoch": 4.39, "learning_rate": 6.1216287678477e-06, "loss": 1.3581, "step": 41500 }, { "epoch": 4.44, "learning_rate": 5.5928080380750935e-06, "loss": 1.3637, "step": 42000 }, { "epoch": 4.49, "learning_rate": 5.063987308302486e-06, "loss": 1.3432, "step": 42500 }, { "epoch": 4.55, "learning_rate": 4.535166578529878e-06, "loss": 1.3463, "step": 43000 }, { "epoch": 4.6, "learning_rate": 4.007403490216817e-06, "loss": 1.3481, "step": 43500 }, { "epoch": 4.65, "learning_rate": 3.4785827604442092e-06, "loss": 1.353, "step": 44000 }, { "epoch": 4.71, "learning_rate": 2.9508196721311478e-06, "loss": 1.3505, "step": 44500 }, { "epoch": 4.76, "learning_rate": 2.4219989423585406e-06, "loss": 1.3404, "step": 45000 }, { "epoch": 4.81, "learning_rate": 1.8931782125859334e-06, "loss": 1.3574, "step": 45500 }, { "epoch": 4.87, "learning_rate": 1.3643574828133265e-06, "loss": 1.3532, "step": 46000 }, { "epoch": 4.92, "learning_rate": 8.355367530407193e-07, "loss": 1.3357, "step": 46500 }, { "epoch": 4.97, "learning_rate": 3.077736647276573e-07, "loss": 1.3374, "step": 47000 }, { "epoch": 5.0, "step": 47275, "total_flos": 7.799978327959142e+17, "train_loss": 1.65651271519492, "train_runtime": 12815.2129, "train_samples_per_second": 236.094, "train_steps_per_second": 3.689 } ], "logging_steps": 500, "max_steps": 47275, "num_train_epochs": 5, "save_steps": 10000000, "total_flos": 7.799978327959142e+17, "trial_name": null, "trial_params": null }