albert-qa-chinese / trainer_state.json
FooJiaYin's picture
Step 29000
e140e83
raw
history blame
7.44 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.122365523749608,
"global_step": 29000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 2.764076753696131e-05,
"loss": 0.6436,
"step": 500
},
{
"epoch": 0.31,
"learning_rate": 2.5281535073922616e-05,
"loss": 0.5523,
"step": 1000
},
{
"epoch": 0.47,
"learning_rate": 2.2922302610883924e-05,
"loss": 0.4916,
"step": 1500
},
{
"epoch": 0.63,
"learning_rate": 2.0563070147845238e-05,
"loss": 0.4873,
"step": 2000
},
{
"epoch": 0.79,
"learning_rate": 1.8203837684806546e-05,
"loss": 0.4573,
"step": 2500
},
{
"epoch": 0.94,
"learning_rate": 1.5844605221767853e-05,
"loss": 0.442,
"step": 3000
},
{
"epoch": 1.1,
"learning_rate": 1.3485372758729161e-05,
"loss": 0.3298,
"step": 3500
},
{
"epoch": 1.26,
"learning_rate": 1.1126140295690468e-05,
"loss": 0.2909,
"step": 4000
},
{
"epoch": 1.42,
"learning_rate": 8.766907832651778e-06,
"loss": 0.2898,
"step": 4500
},
{
"epoch": 1.57,
"learning_rate": 6.407675369613086e-06,
"loss": 0.2688,
"step": 5000
},
{
"epoch": 1.73,
"learning_rate": 4.048442906574395e-06,
"loss": 0.2815,
"step": 5500
},
{
"epoch": 1.89,
"learning_rate": 1.6892104435357032e-06,
"loss": 0.2442,
"step": 6000
},
{
"epoch": 2.04,
"learning_rate": 2.6932997798049702e-05,
"loss": 0.2957,
"step": 6500
},
{
"epoch": 2.2,
"learning_rate": 2.669707455174583e-05,
"loss": 0.2735,
"step": 7000
},
{
"epoch": 2.36,
"learning_rate": 2.6461151305441964e-05,
"loss": 0.307,
"step": 7500
},
{
"epoch": 2.52,
"learning_rate": 2.6225228059138093e-05,
"loss": 0.2875,
"step": 8000
},
{
"epoch": 2.67,
"learning_rate": 2.5989304812834225e-05,
"loss": 0.3232,
"step": 8500
},
{
"epoch": 2.83,
"learning_rate": 2.5753381566530358e-05,
"loss": 0.2979,
"step": 9000
},
{
"epoch": 2.99,
"learning_rate": 2.5517458320226487e-05,
"loss": 0.312,
"step": 9500
},
{
"epoch": 3.15,
"learning_rate": 2.5281535073922616e-05,
"loss": 0.2297,
"step": 10000
},
{
"epoch": 3.3,
"learning_rate": 2.504561182761875e-05,
"loss": 0.2251,
"step": 10500
},
{
"epoch": 3.46,
"learning_rate": 2.480968858131488e-05,
"loss": 0.2427,
"step": 11000
},
{
"epoch": 3.62,
"learning_rate": 2.457376533501101e-05,
"loss": 0.2549,
"step": 11500
},
{
"epoch": 3.77,
"learning_rate": 2.4337842088707142e-05,
"loss": 0.2566,
"step": 12000
},
{
"epoch": 3.93,
"learning_rate": 2.410191884240327e-05,
"loss": 0.2538,
"step": 12500
},
{
"epoch": 4.09,
"learning_rate": 2.3865995596099404e-05,
"loss": 0.1936,
"step": 13000
},
{
"epoch": 4.25,
"learning_rate": 2.3630072349795533e-05,
"loss": 0.1742,
"step": 13500
},
{
"epoch": 4.4,
"learning_rate": 2.3394149103491665e-05,
"loss": 0.1871,
"step": 14000
},
{
"epoch": 4.56,
"learning_rate": 2.3158225857187798e-05,
"loss": 0.1868,
"step": 14500
},
{
"epoch": 4.72,
"learning_rate": 2.2922302610883924e-05,
"loss": 0.1962,
"step": 15000
},
{
"epoch": 4.88,
"learning_rate": 2.2686379364580056e-05,
"loss": 0.2056,
"step": 15500
},
{
"epoch": 5.03,
"learning_rate": 2.245045611827619e-05,
"loss": 0.1918,
"step": 16000
},
{
"epoch": 5.19,
"learning_rate": 2.221453287197232e-05,
"loss": 0.145,
"step": 16500
},
{
"epoch": 5.35,
"learning_rate": 2.197860962566845e-05,
"loss": 0.14,
"step": 17000
},
{
"epoch": 5.5,
"learning_rate": 2.174268637936458e-05,
"loss": 0.1442,
"step": 17500
},
{
"epoch": 5.66,
"learning_rate": 2.150676313306071e-05,
"loss": 0.1617,
"step": 18000
},
{
"epoch": 5.82,
"learning_rate": 2.127083988675684e-05,
"loss": 0.1525,
"step": 18500
},
{
"epoch": 5.98,
"learning_rate": 2.1034916640452973e-05,
"loss": 0.159,
"step": 19000
},
{
"epoch": 6.13,
"learning_rate": 2.0798993394149106e-05,
"loss": 0.1022,
"step": 19500
},
{
"epoch": 6.29,
"learning_rate": 2.0563070147845238e-05,
"loss": 0.1082,
"step": 20000
},
{
"epoch": 6.45,
"learning_rate": 2.0327146901541364e-05,
"loss": 0.1143,
"step": 20500
},
{
"epoch": 6.61,
"learning_rate": 2.0091223655237496e-05,
"loss": 0.1268,
"step": 21000
},
{
"epoch": 6.76,
"learning_rate": 1.985530040893363e-05,
"loss": 0.1178,
"step": 21500
},
{
"epoch": 6.92,
"learning_rate": 1.9619377162629758e-05,
"loss": 0.1102,
"step": 22000
},
{
"epoch": 7.08,
"learning_rate": 1.938345391632589e-05,
"loss": 0.0948,
"step": 22500
},
{
"epoch": 7.23,
"learning_rate": 1.914753067002202e-05,
"loss": 0.0709,
"step": 23000
},
{
"epoch": 7.39,
"learning_rate": 1.891160742371815e-05,
"loss": 0.0904,
"step": 23500
},
{
"epoch": 7.55,
"learning_rate": 1.867568417741428e-05,
"loss": 0.114,
"step": 24000
},
{
"epoch": 7.71,
"learning_rate": 1.8439760931110413e-05,
"loss": 0.1083,
"step": 24500
},
{
"epoch": 7.86,
"learning_rate": 1.8203837684806546e-05,
"loss": 0.0931,
"step": 25000
},
{
"epoch": 8.02,
"learning_rate": 1.796791443850267e-05,
"loss": 0.0845,
"step": 25500
},
{
"epoch": 8.18,
"learning_rate": 1.7731991192198804e-05,
"loss": 0.0557,
"step": 26000
},
{
"epoch": 8.34,
"learning_rate": 1.7496067945894936e-05,
"loss": 0.0824,
"step": 26500
},
{
"epoch": 8.49,
"learning_rate": 1.726014469959107e-05,
"loss": 0.064,
"step": 27000
},
{
"epoch": 8.65,
"learning_rate": 1.7024221453287198e-05,
"loss": 0.076,
"step": 27500
},
{
"epoch": 8.81,
"learning_rate": 1.678829820698333e-05,
"loss": 0.0699,
"step": 28000
},
{
"epoch": 8.97,
"learning_rate": 1.655237496067946e-05,
"loss": 0.0771,
"step": 28500
},
{
"epoch": 9.12,
"learning_rate": 1.631645171437559e-05,
"loss": 0.0455,
"step": 29000
}
],
"max_steps": 63580,
"num_train_epochs": 20,
"total_flos": 1.020599417048832e+16,
"trial_name": null,
"trial_params": null
}