|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.939195509822264, |
|
"global_step": 42500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9415341440598695e-05, |
|
"loss": 4.3211, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.883068288119738e-05, |
|
"loss": 3.4901, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8246024321796074e-05, |
|
"loss": 3.1921, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.766136576239477e-05, |
|
"loss": 2.967, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.7076707202993454e-05, |
|
"loss": 2.8175, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.6492048643592146e-05, |
|
"loss": 2.7129, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.590739008419083e-05, |
|
"loss": 2.6442, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.5322731524789526e-05, |
|
"loss": 2.5971, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.473807296538822e-05, |
|
"loss": 2.4775, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4153414405986905e-05, |
|
"loss": 2.3784, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.35687558465856e-05, |
|
"loss": 2.3465, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.298409728718429e-05, |
|
"loss": 2.3377, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.239943872778298e-05, |
|
"loss": 2.3163, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.181478016838167e-05, |
|
"loss": 2.3037, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.123012160898036e-05, |
|
"loss": 2.2963, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.064546304957905e-05, |
|
"loss": 2.279, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.006080449017774e-05, |
|
"loss": 2.2744, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.947614593077643e-05, |
|
"loss": 2.0997, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.889148737137512e-05, |
|
"loss": 2.0715, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.8306828811973814e-05, |
|
"loss": 2.0789, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.77221702525725e-05, |
|
"loss": 2.075, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.713751169317119e-05, |
|
"loss": 2.0657, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.655285313376988e-05, |
|
"loss": 2.0795, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.596819457436857e-05, |
|
"loss": 2.0503, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.538353601496726e-05, |
|
"loss": 2.0637, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.479887745556595e-05, |
|
"loss": 1.9936, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.421421889616464e-05, |
|
"loss": 1.8791, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.362956033676333e-05, |
|
"loss": 1.8823, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.304490177736202e-05, |
|
"loss": 1.8947, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.246024321796071e-05, |
|
"loss": 1.8993, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.18755846585594e-05, |
|
"loss": 1.8999, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.129092609915809e-05, |
|
"loss": 1.8917, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.070626753975678e-05, |
|
"loss": 1.9039, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.0121608980355477e-05, |
|
"loss": 1.8958, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.9536950420954164e-05, |
|
"loss": 1.7626, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.8952291861552856e-05, |
|
"loss": 1.7527, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.8367633302151546e-05, |
|
"loss": 1.7416, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.7782974742750236e-05, |
|
"loss": 1.7483, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.7198316183348925e-05, |
|
"loss": 1.7518, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.6613657623947615e-05, |
|
"loss": 1.7548, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.6028999064546304e-05, |
|
"loss": 1.7542, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.5444340505144997e-05, |
|
"loss": 1.7547, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.4859681945743687e-05, |
|
"loss": 1.7305, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 2.4275023386342376e-05, |
|
"loss": 1.6149, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 2.3690364826941066e-05, |
|
"loss": 1.6119, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 2.310570626753976e-05, |
|
"loss": 1.6279, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.2521047708138448e-05, |
|
"loss": 1.6463, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.1936389148737138e-05, |
|
"loss": 1.6341, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 2.1351730589335827e-05, |
|
"loss": 1.6401, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.076707202993452e-05, |
|
"loss": 1.647, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 2.018241347053321e-05, |
|
"loss": 1.6439, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.95977549111319e-05, |
|
"loss": 1.5445, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.9013096351730592e-05, |
|
"loss": 1.5145, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1.8428437792329282e-05, |
|
"loss": 1.5314, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.784377923292797e-05, |
|
"loss": 1.534, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.725912067352666e-05, |
|
"loss": 1.5329, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.6674462114125354e-05, |
|
"loss": 1.5302, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.6089803554724043e-05, |
|
"loss": 1.5467, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.5505144995322733e-05, |
|
"loss": 1.5472, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.492048643592142e-05, |
|
"loss": 1.5257, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 1.4335827876520114e-05, |
|
"loss": 1.4319, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.3751169317118803e-05, |
|
"loss": 1.4378, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.3166510757717493e-05, |
|
"loss": 1.4506, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.2581852198316186e-05, |
|
"loss": 1.4453, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 1.1997193638914875e-05, |
|
"loss": 1.4588, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 1.1412535079513565e-05, |
|
"loss": 1.4601, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 1.0827876520112256e-05, |
|
"loss": 1.4585, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.0243217960710946e-05, |
|
"loss": 1.4664, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 9.658559401309635e-06, |
|
"loss": 1.3973, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.073900841908325e-06, |
|
"loss": 1.3694, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.489242282507016e-06, |
|
"loss": 1.3876, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.904583723105706e-06, |
|
"loss": 1.3896, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 7.319925163704397e-06, |
|
"loss": 1.3918, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.735266604303088e-06, |
|
"loss": 1.382, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.1506080449017775e-06, |
|
"loss": 1.3942, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.565949485500468e-06, |
|
"loss": 1.3889, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.981290926099158e-06, |
|
"loss": 1.3951, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.396632366697849e-06, |
|
"loss": 1.3346, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 3.811973807296539e-06, |
|
"loss": 1.3361, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3.2273152478952295e-06, |
|
"loss": 1.3329, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.64265668849392e-06, |
|
"loss": 1.3371, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.0579981290926103e-06, |
|
"loss": 1.3442, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.4733395696913004e-06, |
|
"loss": 1.3402, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 8.886810102899906e-07, |
|
"loss": 1.3439, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 3.04022450888681e-07, |
|
"loss": 1.3374, |
|
"step": 42500 |
|
} |
|
], |
|
"max_steps": 42760, |
|
"num_train_epochs": 10, |
|
"total_flos": 1687120822272000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|