|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.594212016357345, |
|
"global_step": 30500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.764076753696131e-05, |
|
"loss": 0.6436, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.5281535073922616e-05, |
|
"loss": 0.5523, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2922302610883924e-05, |
|
"loss": 0.4916, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0563070147845238e-05, |
|
"loss": 0.4873, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8203837684806546e-05, |
|
"loss": 0.4573, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.5844605221767853e-05, |
|
"loss": 0.442, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3485372758729161e-05, |
|
"loss": 0.3298, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1126140295690468e-05, |
|
"loss": 0.2909, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.766907832651778e-06, |
|
"loss": 0.2898, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.407675369613086e-06, |
|
"loss": 0.2688, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.048442906574395e-06, |
|
"loss": 0.2815, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6892104435357032e-06, |
|
"loss": 0.2442, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.6932997798049702e-05, |
|
"loss": 0.2957, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.669707455174583e-05, |
|
"loss": 0.2735, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6461151305441964e-05, |
|
"loss": 0.307, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.6225228059138093e-05, |
|
"loss": 0.2875, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.5989304812834225e-05, |
|
"loss": 0.3232, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.5753381566530358e-05, |
|
"loss": 0.2979, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.5517458320226487e-05, |
|
"loss": 0.312, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.5281535073922616e-05, |
|
"loss": 0.2297, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.504561182761875e-05, |
|
"loss": 0.2251, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.480968858131488e-05, |
|
"loss": 0.2427, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.457376533501101e-05, |
|
"loss": 0.2549, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.4337842088707142e-05, |
|
"loss": 0.2566, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.410191884240327e-05, |
|
"loss": 0.2538, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.3865995596099404e-05, |
|
"loss": 0.1936, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.3630072349795533e-05, |
|
"loss": 0.1742, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.3394149103491665e-05, |
|
"loss": 0.1871, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.3158225857187798e-05, |
|
"loss": 0.1868, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.2922302610883924e-05, |
|
"loss": 0.1962, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.2686379364580056e-05, |
|
"loss": 0.2056, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.245045611827619e-05, |
|
"loss": 0.1918, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 2.221453287197232e-05, |
|
"loss": 0.145, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 2.197860962566845e-05, |
|
"loss": 0.14, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.174268637936458e-05, |
|
"loss": 0.1442, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.150676313306071e-05, |
|
"loss": 0.1617, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.127083988675684e-05, |
|
"loss": 0.1525, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 2.1034916640452973e-05, |
|
"loss": 0.159, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 2.0798993394149106e-05, |
|
"loss": 0.1022, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 2.0563070147845238e-05, |
|
"loss": 0.1082, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 2.0327146901541364e-05, |
|
"loss": 0.1143, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 2.0091223655237496e-05, |
|
"loss": 0.1268, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.985530040893363e-05, |
|
"loss": 0.1178, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.9619377162629758e-05, |
|
"loss": 0.1102, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.938345391632589e-05, |
|
"loss": 0.0948, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.914753067002202e-05, |
|
"loss": 0.0709, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.891160742371815e-05, |
|
"loss": 0.0904, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.867568417741428e-05, |
|
"loss": 0.114, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.8439760931110413e-05, |
|
"loss": 0.1083, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 1.8203837684806546e-05, |
|
"loss": 0.0931, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.796791443850267e-05, |
|
"loss": 0.0845, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.7731991192198804e-05, |
|
"loss": 0.0557, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.7496067945894936e-05, |
|
"loss": 0.0824, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.726014469959107e-05, |
|
"loss": 0.064, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.7024221453287198e-05, |
|
"loss": 0.076, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 1.678829820698333e-05, |
|
"loss": 0.0699, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 1.655237496067946e-05, |
|
"loss": 0.0771, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 1.631645171437559e-05, |
|
"loss": 0.0455, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 1.608052846807172e-05, |
|
"loss": 0.0437, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.5844605221767853e-05, |
|
"loss": 0.0667, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 1.5608681975463986e-05, |
|
"loss": 0.0472, |
|
"step": 30500 |
|
} |
|
], |
|
"max_steps": 63580, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.073395869196032e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|