|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.0, |
|
"global_step": 94050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-09, |
|
"loss": 10.4865, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.5e-06, |
|
"loss": 9.4439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-06, |
|
"loss": 7.6179, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.5e-06, |
|
"loss": 6.3619, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1e-05, |
|
"loss": 6.0809, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.25e-05, |
|
"loss": 5.957, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.5e-05, |
|
"loss": 5.8758, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.75e-05, |
|
"loss": 5.8102, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2e-05, |
|
"loss": 5.7625, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.25e-05, |
|
"loss": 5.7195, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.5e-05, |
|
"loss": 5.6801, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 5.6449, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3e-05, |
|
"loss": 5.6134, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 5.591, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.5e-05, |
|
"loss": 5.5683, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 5.5419, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4e-05, |
|
"loss": 5.5231, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.25e-05, |
|
"loss": 5.506, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.4995000000000005e-05, |
|
"loss": 5.4871, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.7495e-05, |
|
"loss": 5.4763, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.9995000000000005e-05, |
|
"loss": 5.4615, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.998800480769231e-05, |
|
"loss": 5.4468, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.997600961538462e-05, |
|
"loss": 5.4341, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.996399038461539e-05, |
|
"loss": 5.4224, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.995197115384615e-05, |
|
"loss": 5.4099, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.993995192307693e-05, |
|
"loss": 5.3978, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.992795673076923e-05, |
|
"loss": 5.3897, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.991593750000001e-05, |
|
"loss": 5.3836, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 4.990391826923077e-05, |
|
"loss": 5.3737, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.989189903846154e-05, |
|
"loss": 5.3668, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.987990384615385e-05, |
|
"loss": 5.3597, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.986788461538462e-05, |
|
"loss": 5.3485, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.9855889423076926e-05, |
|
"loss": 5.3413, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.9843870192307694e-05, |
|
"loss": 5.338, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.983185096153846e-05, |
|
"loss": 5.3304, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.981983173076924e-05, |
|
"loss": 5.3258, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.98078125e-05, |
|
"loss": 5.317, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.9795793269230774e-05, |
|
"loss": 5.3134, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.978377403846154e-05, |
|
"loss": 5.3097, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.977175480769231e-05, |
|
"loss": 5.3019, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.9759759615384614e-05, |
|
"loss": 5.2985, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.974774038461539e-05, |
|
"loss": 5.2942, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.973572115384616e-05, |
|
"loss": 5.2893, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.9723701923076925e-05, |
|
"loss": 5.2843, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 4.971170673076923e-05, |
|
"loss": 5.2784, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 4.9699687500000004e-05, |
|
"loss": 5.2732, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.968766826923077e-05, |
|
"loss": 5.2701, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 4.967564903846154e-05, |
|
"loss": 5.2677, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.9663653846153844e-05, |
|
"loss": 5.2644, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.965163461538462e-05, |
|
"loss": 5.2562, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.963963942307693e-05, |
|
"loss": 5.2557, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 4.96276201923077e-05, |
|
"loss": 5.2529, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 4.9615600961538466e-05, |
|
"loss": 5.2504, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 4.9603581730769234e-05, |
|
"loss": 5.1431, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 4.95915625e-05, |
|
"loss": 4.8144, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 4.957954326923077e-05, |
|
"loss": 4.5379, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.9567524038461545e-05, |
|
"loss": 4.282, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 4.955550480769231e-05, |
|
"loss": 3.7642, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.9543485576923075e-05, |
|
"loss": 3.0854, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.9531490384615385e-05, |
|
"loss": 2.6674, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.951947115384616e-05, |
|
"loss": 2.2254, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 4.950745192307693e-05, |
|
"loss": 1.9446, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.94954326923077e-05, |
|
"loss": 1.7693, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.94834375e-05, |
|
"loss": 1.6527, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.9471418269230775e-05, |
|
"loss": 1.5696, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 4.9459399038461544e-05, |
|
"loss": 1.5054, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 4.944737980769231e-05, |
|
"loss": 1.4447, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 4.9435384615384616e-05, |
|
"loss": 1.3901, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 4.942336538461539e-05, |
|
"loss": 1.3332, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 4.941134615384615e-05, |
|
"loss": 1.2692, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.939932692307693e-05, |
|
"loss": 1.217, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 4.938733173076923e-05, |
|
"loss": 1.178, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 4.9375312500000006e-05, |
|
"loss": 1.143, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 4.936329326923077e-05, |
|
"loss": 1.1109, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 4.935129807692308e-05, |
|
"loss": 1.0859, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 4.9339278846153846e-05, |
|
"loss": 1.0619, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 4.932725961538462e-05, |
|
"loss": 1.0387, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 4.931524038461538e-05, |
|
"loss": 1.0205, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 4.930322115384616e-05, |
|
"loss": 1.0017, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 4.929122596153846e-05, |
|
"loss": 0.9856, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.9279206730769236e-05, |
|
"loss": 0.9707, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 4.92671875e-05, |
|
"loss": 0.9574, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 4.925516826923077e-05, |
|
"loss": 0.9455, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4.924314903846154e-05, |
|
"loss": 0.9323, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 4.923112980769231e-05, |
|
"loss": 0.9199, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 4.921913461538461e-05, |
|
"loss": 0.9113, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 4.920713942307692e-05, |
|
"loss": 0.9012, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.919512019230769e-05, |
|
"loss": 0.8939, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 4.9183100961538466e-05, |
|
"loss": 0.8851, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 4.917108173076923e-05, |
|
"loss": 0.8745, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.91590625e-05, |
|
"loss": 0.8651, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 4.914704326923077e-05, |
|
"loss": 0.8578, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.913502403846154e-05, |
|
"loss": 0.8519, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 4.912300480769231e-05, |
|
"loss": 0.8457, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.911098557692308e-05, |
|
"loss": 0.8389, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.9098990384615386e-05, |
|
"loss": 0.8305, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.9086971153846154e-05, |
|
"loss": 0.8233, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 4.907495192307692e-05, |
|
"loss": 0.8189, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 4.90629326923077e-05, |
|
"loss": 0.8129, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.905093750000001e-05, |
|
"loss": 0.8076, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.903894230769231e-05, |
|
"loss": 0.8019, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 4.902692307692308e-05, |
|
"loss": 0.7962, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 4.901490384615385e-05, |
|
"loss": 0.7904, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.900288461538462e-05, |
|
"loss": 0.7879, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.8990865384615384e-05, |
|
"loss": 0.7811, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.897884615384616e-05, |
|
"loss": 0.7781, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 4.896682692307693e-05, |
|
"loss": 0.7724, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 4.8954807692307695e-05, |
|
"loss": 0.7682, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 4.89428125e-05, |
|
"loss": 0.7637, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.893081730769231e-05, |
|
"loss": 0.7592, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 4.891879807692308e-05, |
|
"loss": 0.7541, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 4.890677884615385e-05, |
|
"loss": 0.75, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 4.8894759615384614e-05, |
|
"loss": 0.749, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 4.888274038461539e-05, |
|
"loss": 0.7434, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 4.887074519230769e-05, |
|
"loss": 0.7407, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.885872596153847e-05, |
|
"loss": 0.7381, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.884670673076923e-05, |
|
"loss": 0.7344, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.8834687500000004e-05, |
|
"loss": 0.7282, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 4.882269230769231e-05, |
|
"loss": 0.7272, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.881067307692308e-05, |
|
"loss": 0.7236, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.8798653846153845e-05, |
|
"loss": 0.7196, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 4.878663461538462e-05, |
|
"loss": 0.7164, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 4.877461538461539e-05, |
|
"loss": 0.7129, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 4.87626201923077e-05, |
|
"loss": 0.71, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 4.875060096153846e-05, |
|
"loss": 0.7088, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 4.8738581730769235e-05, |
|
"loss": 0.7057, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 4.87265625e-05, |
|
"loss": 0.7022, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 4.8714567307692313e-05, |
|
"loss": 0.6977, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 4.8702548076923075e-05, |
|
"loss": 0.6988, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 4.869052884615385e-05, |
|
"loss": 0.6943, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 4.867850961538462e-05, |
|
"loss": 0.6919, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 4.8666490384615386e-05, |
|
"loss": 0.6888, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 4.865449519230769e-05, |
|
"loss": 0.686, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 4.8642475961538465e-05, |
|
"loss": 0.6843, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 4.863045673076923e-05, |
|
"loss": 0.681, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 4.86184375e-05, |
|
"loss": 0.68, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 4.860644230769231e-05, |
|
"loss": 0.6775, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 4.859442307692308e-05, |
|
"loss": 0.6745, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 4.858240384615385e-05, |
|
"loss": 0.6726, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 4.8570384615384616e-05, |
|
"loss": 0.6716, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.855836538461539e-05, |
|
"loss": 0.6691, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 4.8546370192307695e-05, |
|
"loss": 0.6665, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 4.853435096153846e-05, |
|
"loss": 0.6625, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 4.852233173076923e-05, |
|
"loss": 0.6609, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 4.8510312500000006e-05, |
|
"loss": 0.66, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 4.8498293269230774e-05, |
|
"loss": 0.6566, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 4.848627403846154e-05, |
|
"loss": 0.6561, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 4.8474278846153847e-05, |
|
"loss": 0.6536, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 4.846225961538462e-05, |
|
"loss": 0.6509, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 4.845024038461539e-05, |
|
"loss": 0.6509, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 4.843822115384616e-05, |
|
"loss": 0.6471, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 14.45, |
|
"learning_rate": 4.8426201923076926e-05, |
|
"loss": 0.6459, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 4.8414206730769237e-05, |
|
"loss": 0.6452, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 4.840221153846154e-05, |
|
"loss": 0.6425, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 4.839019230769231e-05, |
|
"loss": 0.6406, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 4.837817307692308e-05, |
|
"loss": 0.6393, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 4.836615384615385e-05, |
|
"loss": 0.6362, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 4.835413461538461e-05, |
|
"loss": 0.6368, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 4.834211538461539e-05, |
|
"loss": 0.634, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 4.8330096153846156e-05, |
|
"loss": 0.6324, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 4.831810096153847e-05, |
|
"loss": 0.6287, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 4.830608173076923e-05, |
|
"loss": 0.6286, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 4.82940625e-05, |
|
"loss": 0.6267, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 4.828204326923077e-05, |
|
"loss": 0.6257, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 4.827002403846154e-05, |
|
"loss": 0.6245, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 4.825800480769231e-05, |
|
"loss": 0.6216, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 4.824598557692308e-05, |
|
"loss": 0.6208, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 4.8233966346153844e-05, |
|
"loss": 0.621, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 4.822194711538462e-05, |
|
"loss": 0.617, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 4.820995192307692e-05, |
|
"loss": 0.6158, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 4.819795673076923e-05, |
|
"loss": 0.617, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 4.81859375e-05, |
|
"loss": 0.6149, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"learning_rate": 4.817391826923077e-05, |
|
"loss": 0.6128, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 4.816189903846154e-05, |
|
"loss": 0.6119, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 4.814987980769231e-05, |
|
"loss": 0.6104, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 4.8137860576923074e-05, |
|
"loss": 0.6082, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 4.812584134615385e-05, |
|
"loss": 0.6077, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 4.811382211538462e-05, |
|
"loss": 0.6066, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 4.810182692307693e-05, |
|
"loss": 0.6051, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"learning_rate": 4.808980769230769e-05, |
|
"loss": 0.6035, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 4.8077788461538464e-05, |
|
"loss": 0.6032, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 4.806576923076923e-05, |
|
"loss": 0.6001, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 4.805375e-05, |
|
"loss": 0.598, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 4.8041754807692304e-05, |
|
"loss": 0.5996, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 4.802973557692308e-05, |
|
"loss": 0.5987, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 4.801771634615385e-05, |
|
"loss": 0.5973, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 4.800572115384616e-05, |
|
"loss": 0.5957, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 4.7993701923076926e-05, |
|
"loss": 0.5938, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 4.7981682692307694e-05, |
|
"loss": 0.593, |
|
"step": 94000 |
|
} |
|
], |
|
"max_steps": 2090000, |
|
"num_train_epochs": 400, |
|
"total_flos": 2.5346531711380357e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|