{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.939195509822264, "global_step": 42500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.9415341440598695e-05, "loss": 4.3211, "step": 500 }, { "epoch": 0.23, "learning_rate": 4.883068288119738e-05, "loss": 3.4901, "step": 1000 }, { "epoch": 0.35, "learning_rate": 4.8246024321796074e-05, "loss": 3.1921, "step": 1500 }, { "epoch": 0.47, "learning_rate": 4.766136576239477e-05, "loss": 2.967, "step": 2000 }, { "epoch": 0.58, "learning_rate": 4.7076707202993454e-05, "loss": 2.8175, "step": 2500 }, { "epoch": 0.7, "learning_rate": 4.6492048643592146e-05, "loss": 2.7129, "step": 3000 }, { "epoch": 0.82, "learning_rate": 4.590739008419083e-05, "loss": 2.6442, "step": 3500 }, { "epoch": 0.94, "learning_rate": 4.5322731524789526e-05, "loss": 2.5971, "step": 4000 }, { "epoch": 1.05, "learning_rate": 4.473807296538822e-05, "loss": 2.4775, "step": 4500 }, { "epoch": 1.17, "learning_rate": 4.4153414405986905e-05, "loss": 2.3784, "step": 5000 }, { "epoch": 1.29, "learning_rate": 4.35687558465856e-05, "loss": 2.3465, "step": 5500 }, { "epoch": 1.4, "learning_rate": 4.298409728718429e-05, "loss": 2.3377, "step": 6000 }, { "epoch": 1.52, "learning_rate": 4.239943872778298e-05, "loss": 2.3163, "step": 6500 }, { "epoch": 1.64, "learning_rate": 4.181478016838167e-05, "loss": 2.3037, "step": 7000 }, { "epoch": 1.75, "learning_rate": 4.123012160898036e-05, "loss": 2.2963, "step": 7500 }, { "epoch": 1.87, "learning_rate": 4.064546304957905e-05, "loss": 2.279, "step": 8000 }, { "epoch": 1.99, "learning_rate": 4.006080449017774e-05, "loss": 2.2744, "step": 8500 }, { "epoch": 2.1, "learning_rate": 3.947614593077643e-05, "loss": 2.0997, "step": 9000 }, { "epoch": 2.22, "learning_rate": 3.889148737137512e-05, "loss": 2.0715, "step": 9500 }, { "epoch": 2.34, "learning_rate": 3.8306828811973814e-05, "loss": 2.0789, "step": 10000 }, { "epoch": 2.46, "learning_rate": 3.77221702525725e-05, "loss": 2.075, "step": 10500 }, { "epoch": 2.57, "learning_rate": 3.713751169317119e-05, "loss": 2.0657, "step": 11000 }, { "epoch": 2.69, "learning_rate": 3.655285313376988e-05, "loss": 2.0795, "step": 11500 }, { "epoch": 2.81, "learning_rate": 3.596819457436857e-05, "loss": 2.0503, "step": 12000 }, { "epoch": 2.92, "learning_rate": 3.538353601496726e-05, "loss": 2.0637, "step": 12500 }, { "epoch": 3.04, "learning_rate": 3.479887745556595e-05, "loss": 1.9936, "step": 13000 }, { "epoch": 3.16, "learning_rate": 3.421421889616464e-05, "loss": 1.8791, "step": 13500 }, { "epoch": 3.27, "learning_rate": 3.362956033676333e-05, "loss": 1.8823, "step": 14000 }, { "epoch": 3.39, "learning_rate": 3.304490177736202e-05, "loss": 1.8947, "step": 14500 }, { "epoch": 3.51, "learning_rate": 3.246024321796071e-05, "loss": 1.8993, "step": 15000 }, { "epoch": 3.62, "learning_rate": 3.18755846585594e-05, "loss": 1.8999, "step": 15500 }, { "epoch": 3.74, "learning_rate": 3.129092609915809e-05, "loss": 1.8917, "step": 16000 }, { "epoch": 3.86, "learning_rate": 3.070626753975678e-05, "loss": 1.9039, "step": 16500 }, { "epoch": 3.98, "learning_rate": 3.0121608980355477e-05, "loss": 1.8958, "step": 17000 }, { "epoch": 4.09, "learning_rate": 2.9536950420954164e-05, "loss": 1.7626, "step": 17500 }, { "epoch": 4.21, "learning_rate": 2.8952291861552856e-05, "loss": 1.7527, "step": 18000 }, { "epoch": 4.33, "learning_rate": 2.8367633302151546e-05, "loss": 1.7416, "step": 18500 }, { "epoch": 4.44, "learning_rate": 2.7782974742750236e-05, "loss": 1.7483, "step": 19000 }, { "epoch": 4.56, "learning_rate": 2.7198316183348925e-05, "loss": 1.7518, "step": 19500 }, { "epoch": 4.68, "learning_rate": 2.6613657623947615e-05, "loss": 1.7548, "step": 20000 }, { "epoch": 4.79, "learning_rate": 2.6028999064546304e-05, "loss": 1.7542, "step": 20500 }, { "epoch": 4.91, "learning_rate": 2.5444340505144997e-05, "loss": 1.7547, "step": 21000 }, { "epoch": 5.03, "learning_rate": 2.4859681945743687e-05, "loss": 1.7305, "step": 21500 }, { "epoch": 5.14, "learning_rate": 2.4275023386342376e-05, "loss": 1.6149, "step": 22000 }, { "epoch": 5.26, "learning_rate": 2.3690364826941066e-05, "loss": 1.6119, "step": 22500 }, { "epoch": 5.38, "learning_rate": 2.310570626753976e-05, "loss": 1.6279, "step": 23000 }, { "epoch": 5.5, "learning_rate": 2.2521047708138448e-05, "loss": 1.6463, "step": 23500 }, { "epoch": 5.61, "learning_rate": 2.1936389148737138e-05, "loss": 1.6341, "step": 24000 }, { "epoch": 5.73, "learning_rate": 2.1351730589335827e-05, "loss": 1.6401, "step": 24500 }, { "epoch": 5.85, "learning_rate": 2.076707202993452e-05, "loss": 1.647, "step": 25000 }, { "epoch": 5.96, "learning_rate": 2.018241347053321e-05, "loss": 1.6439, "step": 25500 }, { "epoch": 6.08, "learning_rate": 1.95977549111319e-05, "loss": 1.5445, "step": 26000 }, { "epoch": 6.2, "learning_rate": 1.9013096351730592e-05, "loss": 1.5145, "step": 26500 }, { "epoch": 6.31, "learning_rate": 1.8428437792329282e-05, "loss": 1.5314, "step": 27000 }, { "epoch": 6.43, "learning_rate": 1.784377923292797e-05, "loss": 1.534, "step": 27500 }, { "epoch": 6.55, "learning_rate": 1.725912067352666e-05, "loss": 1.5329, "step": 28000 }, { "epoch": 6.67, "learning_rate": 1.6674462114125354e-05, "loss": 1.5302, "step": 28500 }, { "epoch": 6.78, "learning_rate": 1.6089803554724043e-05, "loss": 1.5467, "step": 29000 }, { "epoch": 6.9, "learning_rate": 1.5505144995322733e-05, "loss": 1.5472, "step": 29500 }, { "epoch": 7.02, "learning_rate": 1.492048643592142e-05, "loss": 1.5257, "step": 30000 }, { "epoch": 7.13, "learning_rate": 1.4335827876520114e-05, "loss": 1.4319, "step": 30500 }, { "epoch": 7.25, "learning_rate": 1.3751169317118803e-05, "loss": 1.4378, "step": 31000 }, { "epoch": 7.37, "learning_rate": 1.3166510757717493e-05, "loss": 1.4506, "step": 31500 }, { "epoch": 7.48, "learning_rate": 1.2581852198316186e-05, "loss": 1.4453, "step": 32000 }, { "epoch": 7.6, "learning_rate": 1.1997193638914875e-05, "loss": 1.4588, "step": 32500 }, { "epoch": 7.72, "learning_rate": 1.1412535079513565e-05, "loss": 1.4601, "step": 33000 }, { "epoch": 7.83, "learning_rate": 1.0827876520112256e-05, "loss": 1.4585, "step": 33500 }, { "epoch": 7.95, "learning_rate": 1.0243217960710946e-05, "loss": 1.4664, "step": 34000 }, { "epoch": 8.07, "learning_rate": 9.658559401309635e-06, "loss": 1.3973, "step": 34500 }, { "epoch": 8.19, "learning_rate": 9.073900841908325e-06, "loss": 1.3694, "step": 35000 }, { "epoch": 8.3, "learning_rate": 8.489242282507016e-06, "loss": 1.3876, "step": 35500 }, { "epoch": 8.42, "learning_rate": 7.904583723105706e-06, "loss": 1.3896, "step": 36000 }, { "epoch": 8.54, "learning_rate": 7.319925163704397e-06, "loss": 1.3918, "step": 36500 }, { "epoch": 8.65, "learning_rate": 6.735266604303088e-06, "loss": 1.382, "step": 37000 }, { "epoch": 8.77, "learning_rate": 6.1506080449017775e-06, "loss": 1.3942, "step": 37500 }, { "epoch": 8.89, "learning_rate": 5.565949485500468e-06, "loss": 1.3889, "step": 38000 }, { "epoch": 9.0, "learning_rate": 4.981290926099158e-06, "loss": 1.3951, "step": 38500 }, { "epoch": 9.12, "learning_rate": 4.396632366697849e-06, "loss": 1.3346, "step": 39000 }, { "epoch": 9.24, "learning_rate": 3.811973807296539e-06, "loss": 1.3361, "step": 39500 }, { "epoch": 9.35, "learning_rate": 3.2273152478952295e-06, "loss": 1.3329, "step": 40000 }, { "epoch": 9.47, "learning_rate": 2.64265668849392e-06, "loss": 1.3371, "step": 40500 }, { "epoch": 9.59, "learning_rate": 2.0579981290926103e-06, "loss": 1.3442, "step": 41000 }, { "epoch": 9.71, "learning_rate": 1.4733395696913004e-06, "loss": 1.3402, "step": 41500 }, { "epoch": 9.82, "learning_rate": 8.886810102899906e-07, "loss": 1.3439, "step": 42000 }, { "epoch": 9.94, "learning_rate": 3.04022450888681e-07, "loss": 1.3374, "step": 42500 } ], "max_steps": 42760, "num_train_epochs": 10, "total_flos": 1687120822272000.0, "trial_name": null, "trial_params": null }