{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.594212016357345, "global_step": 30500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 2.764076753696131e-05, "loss": 0.6436, "step": 500 }, { "epoch": 0.31, "learning_rate": 2.5281535073922616e-05, "loss": 0.5523, "step": 1000 }, { "epoch": 0.47, "learning_rate": 2.2922302610883924e-05, "loss": 0.4916, "step": 1500 }, { "epoch": 0.63, "learning_rate": 2.0563070147845238e-05, "loss": 0.4873, "step": 2000 }, { "epoch": 0.79, "learning_rate": 1.8203837684806546e-05, "loss": 0.4573, "step": 2500 }, { "epoch": 0.94, "learning_rate": 1.5844605221767853e-05, "loss": 0.442, "step": 3000 }, { "epoch": 1.1, "learning_rate": 1.3485372758729161e-05, "loss": 0.3298, "step": 3500 }, { "epoch": 1.26, "learning_rate": 1.1126140295690468e-05, "loss": 0.2909, "step": 4000 }, { "epoch": 1.42, "learning_rate": 8.766907832651778e-06, "loss": 0.2898, "step": 4500 }, { "epoch": 1.57, "learning_rate": 6.407675369613086e-06, "loss": 0.2688, "step": 5000 }, { "epoch": 1.73, "learning_rate": 4.048442906574395e-06, "loss": 0.2815, "step": 5500 }, { "epoch": 1.89, "learning_rate": 1.6892104435357032e-06, "loss": 0.2442, "step": 6000 }, { "epoch": 2.04, "learning_rate": 2.6932997798049702e-05, "loss": 0.2957, "step": 6500 }, { "epoch": 2.2, "learning_rate": 2.669707455174583e-05, "loss": 0.2735, "step": 7000 }, { "epoch": 2.36, "learning_rate": 2.6461151305441964e-05, "loss": 0.307, "step": 7500 }, { "epoch": 2.52, "learning_rate": 2.6225228059138093e-05, "loss": 0.2875, "step": 8000 }, { "epoch": 2.67, "learning_rate": 2.5989304812834225e-05, "loss": 0.3232, "step": 8500 }, { "epoch": 2.83, "learning_rate": 2.5753381566530358e-05, "loss": 0.2979, "step": 9000 }, { "epoch": 2.99, "learning_rate": 2.5517458320226487e-05, "loss": 0.312, "step": 9500 }, { "epoch": 3.15, "learning_rate": 2.5281535073922616e-05, "loss": 0.2297, "step": 10000 }, { "epoch": 3.3, "learning_rate": 2.504561182761875e-05, "loss": 0.2251, "step": 10500 }, { "epoch": 3.46, "learning_rate": 2.480968858131488e-05, "loss": 0.2427, "step": 11000 }, { "epoch": 3.62, "learning_rate": 2.457376533501101e-05, "loss": 0.2549, "step": 11500 }, { "epoch": 3.77, "learning_rate": 2.4337842088707142e-05, "loss": 0.2566, "step": 12000 }, { "epoch": 3.93, "learning_rate": 2.410191884240327e-05, "loss": 0.2538, "step": 12500 }, { "epoch": 4.09, "learning_rate": 2.3865995596099404e-05, "loss": 0.1936, "step": 13000 }, { "epoch": 4.25, "learning_rate": 2.3630072349795533e-05, "loss": 0.1742, "step": 13500 }, { "epoch": 4.4, "learning_rate": 2.3394149103491665e-05, "loss": 0.1871, "step": 14000 }, { "epoch": 4.56, "learning_rate": 2.3158225857187798e-05, "loss": 0.1868, "step": 14500 }, { "epoch": 4.72, "learning_rate": 2.2922302610883924e-05, "loss": 0.1962, "step": 15000 }, { "epoch": 4.88, "learning_rate": 2.2686379364580056e-05, "loss": 0.2056, "step": 15500 }, { "epoch": 5.03, "learning_rate": 2.245045611827619e-05, "loss": 0.1918, "step": 16000 }, { "epoch": 5.19, "learning_rate": 2.221453287197232e-05, "loss": 0.145, "step": 16500 }, { "epoch": 5.35, "learning_rate": 2.197860962566845e-05, "loss": 0.14, "step": 17000 }, { "epoch": 5.5, "learning_rate": 2.174268637936458e-05, "loss": 0.1442, "step": 17500 }, { "epoch": 5.66, "learning_rate": 2.150676313306071e-05, "loss": 0.1617, "step": 18000 }, { "epoch": 5.82, "learning_rate": 2.127083988675684e-05, "loss": 0.1525, "step": 18500 }, { "epoch": 5.98, "learning_rate": 2.1034916640452973e-05, "loss": 0.159, "step": 19000 }, { "epoch": 6.13, "learning_rate": 2.0798993394149106e-05, "loss": 0.1022, "step": 19500 }, { "epoch": 6.29, "learning_rate": 2.0563070147845238e-05, "loss": 0.1082, "step": 20000 }, { "epoch": 6.45, "learning_rate": 2.0327146901541364e-05, "loss": 0.1143, "step": 20500 }, { "epoch": 6.61, "learning_rate": 2.0091223655237496e-05, "loss": 0.1268, "step": 21000 }, { "epoch": 6.76, "learning_rate": 1.985530040893363e-05, "loss": 0.1178, "step": 21500 }, { "epoch": 6.92, "learning_rate": 1.9619377162629758e-05, "loss": 0.1102, "step": 22000 }, { "epoch": 7.08, "learning_rate": 1.938345391632589e-05, "loss": 0.0948, "step": 22500 }, { "epoch": 7.23, "learning_rate": 1.914753067002202e-05, "loss": 0.0709, "step": 23000 }, { "epoch": 7.39, "learning_rate": 1.891160742371815e-05, "loss": 0.0904, "step": 23500 }, { "epoch": 7.55, "learning_rate": 1.867568417741428e-05, "loss": 0.114, "step": 24000 }, { "epoch": 7.71, "learning_rate": 1.8439760931110413e-05, "loss": 0.1083, "step": 24500 }, { "epoch": 7.86, "learning_rate": 1.8203837684806546e-05, "loss": 0.0931, "step": 25000 }, { "epoch": 8.02, "learning_rate": 1.796791443850267e-05, "loss": 0.0845, "step": 25500 }, { "epoch": 8.18, "learning_rate": 1.7731991192198804e-05, "loss": 0.0557, "step": 26000 }, { "epoch": 8.34, "learning_rate": 1.7496067945894936e-05, "loss": 0.0824, "step": 26500 }, { "epoch": 8.49, "learning_rate": 1.726014469959107e-05, "loss": 0.064, "step": 27000 }, { "epoch": 8.65, "learning_rate": 1.7024221453287198e-05, "loss": 0.076, "step": 27500 }, { "epoch": 8.81, "learning_rate": 1.678829820698333e-05, "loss": 0.0699, "step": 28000 }, { "epoch": 8.97, "learning_rate": 1.655237496067946e-05, "loss": 0.0771, "step": 28500 }, { "epoch": 9.12, "learning_rate": 1.631645171437559e-05, "loss": 0.0455, "step": 29000 }, { "epoch": 9.28, "learning_rate": 1.608052846807172e-05, "loss": 0.0437, "step": 29500 }, { "epoch": 9.44, "learning_rate": 1.5844605221767853e-05, "loss": 0.0667, "step": 30000 }, { "epoch": 9.59, "learning_rate": 1.5608681975463986e-05, "loss": 0.0472, "step": 30500 } ], "max_steps": 63580, "num_train_epochs": 20, "total_flos": 1.073395869196032e+16, "trial_name": null, "trial_params": null }