|
{ |
|
"best_metric": 0.9470999240875244, |
|
"best_model_checkpoint": "/kaggle/output/checkpoint-128000", |
|
"epoch": 5.541069100391134, |
|
"eval_steps": 1000, |
|
"global_step": 136000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.7777777777777777e-11, |
|
"loss": 1.05, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.7750000000000004e-08, |
|
"loss": 1.1403, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.33712574850299404, |
|
"eval_loss": 1.1030837297439575, |
|
"eval_runtime": 29.7882, |
|
"eval_samples_per_second": 168.187, |
|
"eval_steps_per_second": 21.049, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.5527777777777784e-08, |
|
"loss": 1.1194, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.34331337325349304, |
|
"eval_loss": 1.1002165079116821, |
|
"eval_runtime": 29.8685, |
|
"eval_samples_per_second": 167.735, |
|
"eval_steps_per_second": 20.992, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.327777777777778e-08, |
|
"loss": 1.1194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.3471057884231537, |
|
"eval_loss": 1.101266622543335, |
|
"eval_runtime": 29.709, |
|
"eval_samples_per_second": 168.636, |
|
"eval_steps_per_second": 21.105, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.1105555555555557e-07, |
|
"loss": 1.1166, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.34311377245508984, |
|
"eval_loss": 1.0984796285629272, |
|
"eval_runtime": 29.9401, |
|
"eval_samples_per_second": 167.334, |
|
"eval_steps_per_second": 20.942, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3883333333333335e-07, |
|
"loss": 1.1137, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.3403193612774451, |
|
"eval_loss": 1.0974279642105103, |
|
"eval_runtime": 29.8185, |
|
"eval_samples_per_second": 168.016, |
|
"eval_steps_per_second": 21.027, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.6658333333333335e-07, |
|
"loss": 1.1109, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.3592814371257485, |
|
"eval_loss": 1.0984892845153809, |
|
"eval_runtime": 29.893, |
|
"eval_samples_per_second": 167.598, |
|
"eval_steps_per_second": 20.975, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9436111111111112e-07, |
|
"loss": 1.1122, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.36327345309381237, |
|
"eval_loss": 1.0960686206817627, |
|
"eval_runtime": 29.8315, |
|
"eval_samples_per_second": 167.943, |
|
"eval_steps_per_second": 21.018, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.2213888888888891e-07, |
|
"loss": 1.1091, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.35728542914171657, |
|
"eval_loss": 1.0958523750305176, |
|
"eval_runtime": 29.9494, |
|
"eval_samples_per_second": 167.282, |
|
"eval_steps_per_second": 20.935, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.4988888888888893e-07, |
|
"loss": 1.1077, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.35528942115768464, |
|
"eval_loss": 1.1003851890563965, |
|
"eval_runtime": 29.9762, |
|
"eval_samples_per_second": 167.133, |
|
"eval_steps_per_second": 20.917, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.776666666666667e-07, |
|
"loss": 1.1091, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.3790419161676647, |
|
"eval_loss": 1.0951762199401855, |
|
"eval_runtime": 29.8593, |
|
"eval_samples_per_second": 167.787, |
|
"eval_steps_per_second": 20.998, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0541666666666667e-07, |
|
"loss": 1.1047, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.3780439121756487, |
|
"eval_loss": 1.0928910970687866, |
|
"eval_runtime": 29.8768, |
|
"eval_samples_per_second": 167.689, |
|
"eval_steps_per_second": 20.986, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.3319444444444444e-07, |
|
"loss": 1.1053, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.38323353293413176, |
|
"eval_loss": 1.0910215377807617, |
|
"eval_runtime": 29.8703, |
|
"eval_samples_per_second": 167.725, |
|
"eval_steps_per_second": 20.991, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.6094444444444446e-07, |
|
"loss": 1.1035, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.34850299401197604, |
|
"eval_loss": 1.0930161476135254, |
|
"eval_runtime": 29.8865, |
|
"eval_samples_per_second": 167.634, |
|
"eval_steps_per_second": 20.979, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 3.8872222222222223e-07, |
|
"loss": 1.1002, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.3646706586826347, |
|
"eval_loss": 1.093497633934021, |
|
"eval_runtime": 30.0222, |
|
"eval_samples_per_second": 166.877, |
|
"eval_steps_per_second": 20.885, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.1647222222222225e-07, |
|
"loss": 1.1025, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.37924151696606784, |
|
"eval_loss": 1.0900779962539673, |
|
"eval_runtime": 30.0049, |
|
"eval_samples_per_second": 166.973, |
|
"eval_steps_per_second": 20.897, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.4425e-07, |
|
"loss": 1.0992, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.40119760479041916, |
|
"eval_loss": 1.0855979919433594, |
|
"eval_runtime": 30.1095, |
|
"eval_samples_per_second": 166.393, |
|
"eval_steps_per_second": 20.824, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7200000000000004e-07, |
|
"loss": 1.0965, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.4171656686626746, |
|
"eval_loss": 1.0845381021499634, |
|
"eval_runtime": 30.0586, |
|
"eval_samples_per_second": 166.674, |
|
"eval_steps_per_second": 20.859, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.997777777777779e-07, |
|
"loss": 1.0962, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.4087824351297405, |
|
"eval_loss": 1.0860552787780762, |
|
"eval_runtime": 30.2343, |
|
"eval_samples_per_second": 165.706, |
|
"eval_steps_per_second": 20.738, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.275277777777778e-07, |
|
"loss": 1.0936, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.4039920159680639, |
|
"eval_loss": 1.0822259187698364, |
|
"eval_runtime": 30.216, |
|
"eval_samples_per_second": 165.806, |
|
"eval_steps_per_second": 20.751, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.553055555555556e-07, |
|
"loss": 1.0923, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.4095808383233533, |
|
"eval_loss": 1.0820951461791992, |
|
"eval_runtime": 29.968, |
|
"eval_samples_per_second": 167.178, |
|
"eval_steps_per_second": 20.922, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.830555555555556e-07, |
|
"loss": 1.0897, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.4149700598802395, |
|
"eval_loss": 1.0763800144195557, |
|
"eval_runtime": 30.021, |
|
"eval_samples_per_second": 166.883, |
|
"eval_steps_per_second": 20.885, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.108333333333333e-07, |
|
"loss": 1.0888, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.43193612774451096, |
|
"eval_loss": 1.0687414407730103, |
|
"eval_runtime": 30.1577, |
|
"eval_samples_per_second": 166.127, |
|
"eval_steps_per_second": 20.791, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.385833333333334e-07, |
|
"loss": 1.0823, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.4285429141716567, |
|
"eval_loss": 1.0642070770263672, |
|
"eval_runtime": 30.0701, |
|
"eval_samples_per_second": 166.611, |
|
"eval_steps_per_second": 20.851, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.663611111111112e-07, |
|
"loss": 1.0786, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.4363273453093812, |
|
"eval_loss": 1.0598841905593872, |
|
"eval_runtime": 30.0689, |
|
"eval_samples_per_second": 166.617, |
|
"eval_steps_per_second": 20.852, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.941111111111112e-07, |
|
"loss": 1.0708, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.4429141716566866, |
|
"eval_loss": 1.0561795234680176, |
|
"eval_runtime": 30.1301, |
|
"eval_samples_per_second": 166.279, |
|
"eval_steps_per_second": 20.81, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.218888888888889e-07, |
|
"loss": 1.072, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.4303393213572854, |
|
"eval_loss": 1.0574887990951538, |
|
"eval_runtime": 30.0473, |
|
"eval_samples_per_second": 166.737, |
|
"eval_steps_per_second": 20.867, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.496666666666667e-07, |
|
"loss": 1.0701, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.4421157684630739, |
|
"eval_loss": 1.055459976196289, |
|
"eval_runtime": 30.1049, |
|
"eval_samples_per_second": 166.418, |
|
"eval_steps_per_second": 20.827, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 7.774166666666668e-07, |
|
"loss": 1.0677, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.4285429141716567, |
|
"eval_loss": 1.0527067184448242, |
|
"eval_runtime": 30.1476, |
|
"eval_samples_per_second": 166.182, |
|
"eval_steps_per_second": 20.798, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.051944444444445e-07, |
|
"loss": 1.065, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.43852295409181635, |
|
"eval_loss": 1.0494154691696167, |
|
"eval_runtime": 30.036, |
|
"eval_samples_per_second": 166.8, |
|
"eval_steps_per_second": 20.875, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.329444444444445e-07, |
|
"loss": 1.0628, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.4377245508982036, |
|
"eval_loss": 1.0461777448654175, |
|
"eval_runtime": 30.1005, |
|
"eval_samples_per_second": 166.442, |
|
"eval_steps_per_second": 20.83, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.607222222222223e-07, |
|
"loss": 1.0672, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.4435129740518962, |
|
"eval_loss": 1.0531694889068604, |
|
"eval_runtime": 30.1733, |
|
"eval_samples_per_second": 166.041, |
|
"eval_steps_per_second": 20.78, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.884722222222224e-07, |
|
"loss": 1.0638, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.4297405189620758, |
|
"eval_loss": 1.0501775741577148, |
|
"eval_runtime": 30.058, |
|
"eval_samples_per_second": 166.678, |
|
"eval_steps_per_second": 20.86, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.162500000000001e-07, |
|
"loss": 1.0637, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.44491017964071855, |
|
"eval_loss": 1.0452704429626465, |
|
"eval_runtime": 30.3623, |
|
"eval_samples_per_second": 165.007, |
|
"eval_steps_per_second": 20.651, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.440000000000001e-07, |
|
"loss": 1.0558, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.44510978043912175, |
|
"eval_loss": 1.0434744358062744, |
|
"eval_runtime": 30.2239, |
|
"eval_samples_per_second": 165.763, |
|
"eval_steps_per_second": 20.745, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.71777777777778e-07, |
|
"loss": 1.0598, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.4473053892215569, |
|
"eval_loss": 1.039801001548767, |
|
"eval_runtime": 30.107, |
|
"eval_samples_per_second": 166.407, |
|
"eval_steps_per_second": 20.826, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.995277777777778e-07, |
|
"loss": 1.0601, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.4469061876247505, |
|
"eval_loss": 1.037074327468872, |
|
"eval_runtime": 30.1954, |
|
"eval_samples_per_second": 165.919, |
|
"eval_steps_per_second": 20.765, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0273055555555556e-06, |
|
"loss": 1.0529, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_accuracy": 0.45229540918163674, |
|
"eval_loss": 1.0346434116363525, |
|
"eval_runtime": 30.2856, |
|
"eval_samples_per_second": 165.425, |
|
"eval_steps_per_second": 20.703, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0550555555555557e-06, |
|
"loss": 1.0544, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.45129740518962075, |
|
"eval_loss": 1.0338674783706665, |
|
"eval_runtime": 30.1511, |
|
"eval_samples_per_second": 166.163, |
|
"eval_steps_per_second": 20.795, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0828333333333334e-06, |
|
"loss": 1.0528, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.44550898203592815, |
|
"eval_loss": 1.0376836061477661, |
|
"eval_runtime": 30.304, |
|
"eval_samples_per_second": 165.324, |
|
"eval_steps_per_second": 20.69, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.1106111111111112e-06, |
|
"loss": 1.0482, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.4540918163672655, |
|
"eval_loss": 1.031366229057312, |
|
"eval_runtime": 30.2062, |
|
"eval_samples_per_second": 165.86, |
|
"eval_steps_per_second": 20.757, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.1383611111111113e-06, |
|
"loss": 1.0494, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.4552894211576846, |
|
"eval_loss": 1.032992959022522, |
|
"eval_runtime": 30.1667, |
|
"eval_samples_per_second": 166.077, |
|
"eval_steps_per_second": 20.784, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.166138888888889e-06, |
|
"loss": 1.0474, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.45129740518962075, |
|
"eval_loss": 1.0255250930786133, |
|
"eval_runtime": 30.2122, |
|
"eval_samples_per_second": 165.827, |
|
"eval_steps_per_second": 20.753, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.193888888888889e-06, |
|
"loss": 1.0448, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.4483033932135729, |
|
"eval_loss": 1.0412585735321045, |
|
"eval_runtime": 30.2083, |
|
"eval_samples_per_second": 165.849, |
|
"eval_steps_per_second": 20.756, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2216666666666667e-06, |
|
"loss": 1.0458, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.46427145708582834, |
|
"eval_loss": 1.0211690664291382, |
|
"eval_runtime": 30.4051, |
|
"eval_samples_per_second": 164.775, |
|
"eval_steps_per_second": 20.622, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.2494166666666668e-06, |
|
"loss": 1.0468, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.47105788423153694, |
|
"eval_loss": 1.0185887813568115, |
|
"eval_runtime": 30.1465, |
|
"eval_samples_per_second": 166.188, |
|
"eval_steps_per_second": 20.798, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2771944444444445e-06, |
|
"loss": 1.0414, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.46966067864271455, |
|
"eval_loss": 1.0215678215026855, |
|
"eval_runtime": 30.1615, |
|
"eval_samples_per_second": 166.106, |
|
"eval_steps_per_second": 20.788, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.3049444444444446e-06, |
|
"loss": 1.0374, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.47684630738522954, |
|
"eval_loss": 1.0100239515304565, |
|
"eval_runtime": 30.0919, |
|
"eval_samples_per_second": 166.49, |
|
"eval_steps_per_second": 20.836, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3327222222222224e-06, |
|
"loss": 1.0423, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.462874251497006, |
|
"eval_loss": 1.0277544260025024, |
|
"eval_runtime": 30.0926, |
|
"eval_samples_per_second": 166.486, |
|
"eval_steps_per_second": 20.836, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.3604722222222224e-06, |
|
"loss": 1.0332, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4870259481037924, |
|
"eval_loss": 1.0046255588531494, |
|
"eval_runtime": 30.0835, |
|
"eval_samples_per_second": 166.536, |
|
"eval_steps_per_second": 20.842, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.3882500000000002e-06, |
|
"loss": 1.0337, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.48542914171656687, |
|
"eval_loss": 1.00551438331604, |
|
"eval_runtime": 30.1523, |
|
"eval_samples_per_second": 166.157, |
|
"eval_steps_per_second": 20.794, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.416e-06, |
|
"loss": 1.0335, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.4782435129740519, |
|
"eval_loss": 1.0022392272949219, |
|
"eval_runtime": 30.0244, |
|
"eval_samples_per_second": 166.864, |
|
"eval_steps_per_second": 20.883, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4437777777777779e-06, |
|
"loss": 1.0272, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.48003992015968067, |
|
"eval_loss": 1.014728307723999, |
|
"eval_runtime": 30.0497, |
|
"eval_samples_per_second": 166.724, |
|
"eval_steps_per_second": 20.865, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.471527777777778e-06, |
|
"loss": 1.0315, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_accuracy": 0.4848303393213573, |
|
"eval_loss": 0.9981946349143982, |
|
"eval_runtime": 30.2291, |
|
"eval_samples_per_second": 165.734, |
|
"eval_steps_per_second": 20.742, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4993055555555557e-06, |
|
"loss": 1.0291, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.49021956087824353, |
|
"eval_loss": 0.9988633990287781, |
|
"eval_runtime": 30.0961, |
|
"eval_samples_per_second": 166.467, |
|
"eval_steps_per_second": 20.833, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.5270555555555558e-06, |
|
"loss": 1.027, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.48602794411177647, |
|
"eval_loss": 0.99713534116745, |
|
"eval_runtime": 30.1599, |
|
"eval_samples_per_second": 166.115, |
|
"eval_steps_per_second": 20.789, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.5548333333333335e-06, |
|
"loss": 1.0215, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.46487025948103794, |
|
"eval_loss": 1.0212756395339966, |
|
"eval_runtime": 30.2641, |
|
"eval_samples_per_second": 165.543, |
|
"eval_steps_per_second": 20.718, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.5825833333333334e-06, |
|
"loss": 1.0203, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.4874251497005988, |
|
"eval_loss": 0.9981949925422668, |
|
"eval_runtime": 30.1111, |
|
"eval_samples_per_second": 166.384, |
|
"eval_steps_per_second": 20.823, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.6103611111111112e-06, |
|
"loss": 1.0238, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.48562874251497007, |
|
"eval_loss": 1.0043253898620605, |
|
"eval_runtime": 30.4057, |
|
"eval_samples_per_second": 164.771, |
|
"eval_steps_per_second": 20.621, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.638111111111111e-06, |
|
"loss": 1.0211, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.49820359281437127, |
|
"eval_loss": 0.9920447468757629, |
|
"eval_runtime": 30.4581, |
|
"eval_samples_per_second": 164.488, |
|
"eval_steps_per_second": 20.586, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.665888888888889e-06, |
|
"loss": 1.0236, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.49241516966067866, |
|
"eval_loss": 0.9953387975692749, |
|
"eval_runtime": 30.3298, |
|
"eval_samples_per_second": 165.184, |
|
"eval_steps_per_second": 20.673, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.693638888888889e-06, |
|
"loss": 1.0207, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.49760479041916167, |
|
"eval_loss": 0.9943307042121887, |
|
"eval_runtime": 30.3886, |
|
"eval_samples_per_second": 164.865, |
|
"eval_steps_per_second": 20.633, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.7214166666666666e-06, |
|
"loss": 1.0222, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.4880239520958084, |
|
"eval_loss": 1.005075454711914, |
|
"eval_runtime": 30.41, |
|
"eval_samples_per_second": 164.748, |
|
"eval_steps_per_second": 20.618, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.7491666666666667e-06, |
|
"loss": 1.0221, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.5011976047904192, |
|
"eval_loss": 0.9902356266975403, |
|
"eval_runtime": 30.3384, |
|
"eval_samples_per_second": 165.137, |
|
"eval_steps_per_second": 20.667, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.7769444444444447e-06, |
|
"loss": 1.0188, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.5003992015968064, |
|
"eval_loss": 0.9907160997390747, |
|
"eval_runtime": 30.3592, |
|
"eval_samples_per_second": 165.024, |
|
"eval_steps_per_second": 20.653, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.8046944444444446e-06, |
|
"loss": 1.0187, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.49820359281437127, |
|
"eval_loss": 0.9914441704750061, |
|
"eval_runtime": 30.3614, |
|
"eval_samples_per_second": 165.012, |
|
"eval_steps_per_second": 20.651, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.8324722222222223e-06, |
|
"loss": 1.023, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.5007984031936128, |
|
"eval_loss": 0.9877445101737976, |
|
"eval_runtime": 30.341, |
|
"eval_samples_per_second": 165.123, |
|
"eval_steps_per_second": 20.665, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.8602222222222222e-06, |
|
"loss": 1.0195, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.49660678642714573, |
|
"eval_loss": 0.9916561245918274, |
|
"eval_runtime": 30.3344, |
|
"eval_samples_per_second": 165.159, |
|
"eval_steps_per_second": 20.67, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.8880000000000002e-06, |
|
"loss": 1.0181, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.4992015968063872, |
|
"eval_loss": 0.985427975654602, |
|
"eval_runtime": 30.3261, |
|
"eval_samples_per_second": 165.204, |
|
"eval_steps_per_second": 20.675, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9157500000000003e-06, |
|
"loss": 1.0183, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.5033932135728543, |
|
"eval_loss": 0.9862416982650757, |
|
"eval_runtime": 30.3869, |
|
"eval_samples_per_second": 164.874, |
|
"eval_steps_per_second": 20.634, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.943527777777778e-06, |
|
"loss": 1.0176, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.501996007984032, |
|
"eval_loss": 0.9887968897819519, |
|
"eval_runtime": 30.3292, |
|
"eval_samples_per_second": 165.187, |
|
"eval_steps_per_second": 20.673, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9712777777777777e-06, |
|
"loss": 1.0108, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.49520958083832334, |
|
"eval_loss": 0.992777407169342, |
|
"eval_runtime": 30.4, |
|
"eval_samples_per_second": 164.803, |
|
"eval_steps_per_second": 20.625, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.9990555555555557e-06, |
|
"loss": 1.0172, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.49141716566866267, |
|
"eval_loss": 0.9982264041900635, |
|
"eval_runtime": 30.36, |
|
"eval_samples_per_second": 165.02, |
|
"eval_steps_per_second": 20.652, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.026805555555556e-06, |
|
"loss": 1.0151, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.49720558882235527, |
|
"eval_loss": 0.9918563365936279, |
|
"eval_runtime": 30.3051, |
|
"eval_samples_per_second": 165.318, |
|
"eval_steps_per_second": 20.69, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.0545833333333335e-06, |
|
"loss": 1.0129, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.49840319361277446, |
|
"eval_loss": 0.9832558631896973, |
|
"eval_runtime": 30.3597, |
|
"eval_samples_per_second": 165.022, |
|
"eval_steps_per_second": 20.652, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 2.0823611111111115e-06, |
|
"loss": 1.006, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_accuracy": 0.5033932135728543, |
|
"eval_loss": 0.9880185127258301, |
|
"eval_runtime": 30.2854, |
|
"eval_samples_per_second": 165.426, |
|
"eval_steps_per_second": 20.703, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.1101111111111113e-06, |
|
"loss": 1.0069, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.5045908183632735, |
|
"eval_loss": 0.9803372025489807, |
|
"eval_runtime": 30.3285, |
|
"eval_samples_per_second": 165.191, |
|
"eval_steps_per_second": 20.674, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.137888888888889e-06, |
|
"loss": 1.0106, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.47724550898203594, |
|
"eval_loss": 1.0122599601745605, |
|
"eval_runtime": 30.3162, |
|
"eval_samples_per_second": 165.258, |
|
"eval_steps_per_second": 20.682, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.1656388888888888e-06, |
|
"loss": 1.0019, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_accuracy": 0.5045908183632735, |
|
"eval_loss": 0.9800828099250793, |
|
"eval_runtime": 30.2724, |
|
"eval_samples_per_second": 165.497, |
|
"eval_steps_per_second": 20.712, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.1934166666666667e-06, |
|
"loss": 1.0093, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.493812375249501, |
|
"eval_loss": 0.9990217089653015, |
|
"eval_runtime": 30.2198, |
|
"eval_samples_per_second": 165.785, |
|
"eval_steps_per_second": 20.748, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.221166666666667e-06, |
|
"loss": 1.0039, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_accuracy": 0.5067864271457085, |
|
"eval_loss": 0.98616623878479, |
|
"eval_runtime": 30.3059, |
|
"eval_samples_per_second": 165.315, |
|
"eval_steps_per_second": 20.689, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.2489444444444446e-06, |
|
"loss": 1.0062, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.5013972055888224, |
|
"eval_loss": 0.9929732084274292, |
|
"eval_runtime": 30.2754, |
|
"eval_samples_per_second": 165.481, |
|
"eval_steps_per_second": 20.71, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.2766944444444444e-06, |
|
"loss": 1.0145, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_accuracy": 0.5051896207584831, |
|
"eval_loss": 0.9891365766525269, |
|
"eval_runtime": 30.204, |
|
"eval_samples_per_second": 165.872, |
|
"eval_steps_per_second": 20.759, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 2.3044722222222224e-06, |
|
"loss": 1.0012, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.5067864271457085, |
|
"eval_loss": 0.984894871711731, |
|
"eval_runtime": 30.2196, |
|
"eval_samples_per_second": 165.787, |
|
"eval_steps_per_second": 20.748, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.3322222222222223e-06, |
|
"loss": 1.0072, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_accuracy": 0.5075848303393213, |
|
"eval_loss": 0.979631781578064, |
|
"eval_runtime": 30.1636, |
|
"eval_samples_per_second": 166.094, |
|
"eval_steps_per_second": 20.787, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.36e-06, |
|
"loss": 1.0038, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_accuracy": 0.5035928143712575, |
|
"eval_loss": 0.9814818501472473, |
|
"eval_runtime": 30.2922, |
|
"eval_samples_per_second": 165.389, |
|
"eval_steps_per_second": 20.698, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.38775e-06, |
|
"loss": 1.0107, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.5117764471057884, |
|
"eval_loss": 0.9743403792381287, |
|
"eval_runtime": 30.2926, |
|
"eval_samples_per_second": 165.387, |
|
"eval_steps_per_second": 20.698, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.415527777777778e-06, |
|
"loss": 1.0011, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.5073852295409181, |
|
"eval_loss": 0.9760627150535583, |
|
"eval_runtime": 30.3925, |
|
"eval_samples_per_second": 164.843, |
|
"eval_steps_per_second": 20.63, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.443277777777778e-06, |
|
"loss": 1.0045, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.5101796407185629, |
|
"eval_loss": 0.9735654592514038, |
|
"eval_runtime": 30.2088, |
|
"eval_samples_per_second": 165.846, |
|
"eval_steps_per_second": 20.756, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.4710555555555555e-06, |
|
"loss": 0.999, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.4996007984031936, |
|
"eval_loss": 0.9788551926612854, |
|
"eval_runtime": 30.2584, |
|
"eval_samples_per_second": 165.574, |
|
"eval_steps_per_second": 20.721, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.498805555555556e-06, |
|
"loss": 1.0075, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.5095808383233533, |
|
"eval_loss": 0.972398579120636, |
|
"eval_runtime": 30.1697, |
|
"eval_samples_per_second": 166.061, |
|
"eval_steps_per_second": 20.782, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.5265833333333334e-06, |
|
"loss": 1.0085, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_accuracy": 0.5073852295409181, |
|
"eval_loss": 0.9791931509971619, |
|
"eval_runtime": 30.1687, |
|
"eval_samples_per_second": 166.066, |
|
"eval_steps_per_second": 20.783, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.5543333333333337e-06, |
|
"loss": 1.0022, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.5105788423153692, |
|
"eval_loss": 0.9835652112960815, |
|
"eval_runtime": 30.389, |
|
"eval_samples_per_second": 164.862, |
|
"eval_steps_per_second": 20.632, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.5821111111111112e-06, |
|
"loss": 1.006, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.5127744510978044, |
|
"eval_loss": 0.9716958403587341, |
|
"eval_runtime": 30.2822, |
|
"eval_samples_per_second": 165.444, |
|
"eval_steps_per_second": 20.705, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.609861111111111e-06, |
|
"loss": 1.0001, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_accuracy": 0.49880239520958086, |
|
"eval_loss": 0.982902467250824, |
|
"eval_runtime": 30.3141, |
|
"eval_samples_per_second": 165.27, |
|
"eval_steps_per_second": 20.683, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.637638888888889e-06, |
|
"loss": 1.0018, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_accuracy": 0.5117764471057884, |
|
"eval_loss": 0.9711886644363403, |
|
"eval_runtime": 30.3419, |
|
"eval_samples_per_second": 165.118, |
|
"eval_steps_per_second": 20.665, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.6653888888888894e-06, |
|
"loss": 1.001, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.5085828343313373, |
|
"eval_loss": 0.9692357182502747, |
|
"eval_runtime": 30.2793, |
|
"eval_samples_per_second": 165.46, |
|
"eval_steps_per_second": 20.707, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.693166666666667e-06, |
|
"loss": 0.9967, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.5035928143712575, |
|
"eval_loss": 0.9811190366744995, |
|
"eval_runtime": 30.3522, |
|
"eval_samples_per_second": 165.062, |
|
"eval_steps_per_second": 20.657, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.7209166666666668e-06, |
|
"loss": 0.9963, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.5117764471057884, |
|
"eval_loss": 0.9744167923927307, |
|
"eval_runtime": 30.2586, |
|
"eval_samples_per_second": 165.573, |
|
"eval_steps_per_second": 20.721, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.7486944444444448e-06, |
|
"loss": 0.9923, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.5155688622754491, |
|
"eval_loss": 0.9705401659011841, |
|
"eval_runtime": 30.3652, |
|
"eval_samples_per_second": 164.992, |
|
"eval_steps_per_second": 20.649, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.7764444444444446e-06, |
|
"loss": 0.9925, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_accuracy": 0.5161676646706587, |
|
"eval_loss": 0.967511773109436, |
|
"eval_runtime": 30.3989, |
|
"eval_samples_per_second": 164.809, |
|
"eval_steps_per_second": 20.626, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 2.804222222222222e-06, |
|
"loss": 0.9965, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_accuracy": 0.5179640718562875, |
|
"eval_loss": 0.9703009724617004, |
|
"eval_runtime": 30.306, |
|
"eval_samples_per_second": 165.314, |
|
"eval_steps_per_second": 20.689, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.8319722222222225e-06, |
|
"loss": 0.994, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_accuracy": 0.5027944111776447, |
|
"eval_loss": 0.9799131155014038, |
|
"eval_runtime": 30.4057, |
|
"eval_samples_per_second": 164.772, |
|
"eval_steps_per_second": 20.621, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 2.8597500000000004e-06, |
|
"loss": 0.9989, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.5175648702594811, |
|
"eval_loss": 0.965115487575531, |
|
"eval_runtime": 30.5041, |
|
"eval_samples_per_second": 164.24, |
|
"eval_steps_per_second": 20.555, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.8875000000000003e-06, |
|
"loss": 0.9909, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_accuracy": 0.5005988023952096, |
|
"eval_loss": 0.9864978790283203, |
|
"eval_runtime": 30.393, |
|
"eval_samples_per_second": 164.84, |
|
"eval_steps_per_second": 20.63, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.915277777777778e-06, |
|
"loss": 0.9873, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.5017964071856288, |
|
"eval_loss": 0.9913762211799622, |
|
"eval_runtime": 30.3351, |
|
"eval_samples_per_second": 165.155, |
|
"eval_steps_per_second": 20.669, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.9430277777777777e-06, |
|
"loss": 0.9826, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"eval_accuracy": 0.5201596806387225, |
|
"eval_loss": 0.9676294922828674, |
|
"eval_runtime": 30.2604, |
|
"eval_samples_per_second": 165.563, |
|
"eval_steps_per_second": 20.72, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.9708055555555557e-06, |
|
"loss": 0.9832, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_accuracy": 0.5173652694610779, |
|
"eval_loss": 0.9818444848060608, |
|
"eval_runtime": 30.3828, |
|
"eval_samples_per_second": 164.896, |
|
"eval_steps_per_second": 20.637, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.998555555555556e-06, |
|
"loss": 0.984, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.511377245508982, |
|
"eval_loss": 0.9720831513404846, |
|
"eval_runtime": 30.2651, |
|
"eval_samples_per_second": 165.537, |
|
"eval_steps_per_second": 20.717, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.0263333333333336e-06, |
|
"loss": 0.982, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_accuracy": 0.5209580838323353, |
|
"eval_loss": 0.9712573885917664, |
|
"eval_runtime": 30.204, |
|
"eval_samples_per_second": 165.872, |
|
"eval_steps_per_second": 20.759, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 3.0540833333333334e-06, |
|
"loss": 0.9876, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_accuracy": 0.5265469061876248, |
|
"eval_loss": 0.9630448222160339, |
|
"eval_runtime": 30.2241, |
|
"eval_samples_per_second": 165.762, |
|
"eval_steps_per_second": 20.745, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 3.0818611111111114e-06, |
|
"loss": 0.9826, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_accuracy": 0.524750499001996, |
|
"eval_loss": 0.9607908129692078, |
|
"eval_runtime": 30.5687, |
|
"eval_samples_per_second": 163.893, |
|
"eval_steps_per_second": 20.511, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 3.1096111111111113e-06, |
|
"loss": 0.9922, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 0.5295409181636727, |
|
"eval_loss": 0.9560614824295044, |
|
"eval_runtime": 30.2441, |
|
"eval_samples_per_second": 165.652, |
|
"eval_steps_per_second": 20.731, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.137388888888889e-06, |
|
"loss": 0.9784, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.5245508982035928, |
|
"eval_loss": 0.9655715823173523, |
|
"eval_runtime": 30.341, |
|
"eval_samples_per_second": 165.123, |
|
"eval_steps_per_second": 20.665, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.165138888888889e-06, |
|
"loss": 0.9819, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_accuracy": 0.5255489021956088, |
|
"eval_loss": 0.9573906660079956, |
|
"eval_runtime": 30.3563, |
|
"eval_samples_per_second": 165.04, |
|
"eval_steps_per_second": 20.655, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.192916666666667e-06, |
|
"loss": 0.9829, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_accuracy": 0.5325349301397205, |
|
"eval_loss": 0.9549714922904968, |
|
"eval_runtime": 30.4521, |
|
"eval_samples_per_second": 164.521, |
|
"eval_steps_per_second": 20.59, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 3.220666666666667e-06, |
|
"loss": 0.9783, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_accuracy": 0.5281437125748503, |
|
"eval_loss": 0.9588763117790222, |
|
"eval_runtime": 30.2962, |
|
"eval_samples_per_second": 165.368, |
|
"eval_steps_per_second": 20.696, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 3.248444444444445e-06, |
|
"loss": 0.9843, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_accuracy": 0.5281437125748503, |
|
"eval_loss": 0.9612971544265747, |
|
"eval_runtime": 30.4047, |
|
"eval_samples_per_second": 164.777, |
|
"eval_steps_per_second": 20.622, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.276194444444445e-06, |
|
"loss": 0.9856, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.5283433133732535, |
|
"eval_loss": 0.9553202986717224, |
|
"eval_runtime": 30.3493, |
|
"eval_samples_per_second": 165.078, |
|
"eval_steps_per_second": 20.659, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.3039722222222223e-06, |
|
"loss": 0.9788, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_accuracy": 0.5363273453093812, |
|
"eval_loss": 0.9543020129203796, |
|
"eval_runtime": 30.4457, |
|
"eval_samples_per_second": 164.555, |
|
"eval_steps_per_second": 20.594, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.331694444444445e-06, |
|
"loss": 0.9832, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_accuracy": 0.5127744510978044, |
|
"eval_loss": 0.9705111384391785, |
|
"eval_runtime": 30.2656, |
|
"eval_samples_per_second": 165.534, |
|
"eval_steps_per_second": 20.717, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 3.359472222222222e-06, |
|
"loss": 0.9753, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_accuracy": 0.5327345309381237, |
|
"eval_loss": 0.9542443156242371, |
|
"eval_runtime": 30.3153, |
|
"eval_samples_per_second": 165.263, |
|
"eval_steps_per_second": 20.683, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.38725e-06, |
|
"loss": 0.9784, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_accuracy": 0.5367265469061876, |
|
"eval_loss": 0.951139509677887, |
|
"eval_runtime": 30.2562, |
|
"eval_samples_per_second": 165.586, |
|
"eval_steps_per_second": 20.723, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 3.415027777777778e-06, |
|
"loss": 0.9781, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_accuracy": 0.5171656686626747, |
|
"eval_loss": 0.9652427434921265, |
|
"eval_runtime": 30.3026, |
|
"eval_samples_per_second": 165.332, |
|
"eval_steps_per_second": 20.691, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.442777777777778e-06, |
|
"loss": 0.9649, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.539121756487026, |
|
"eval_loss": 0.9554200172424316, |
|
"eval_runtime": 30.2878, |
|
"eval_samples_per_second": 165.413, |
|
"eval_steps_per_second": 20.701, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 3.470555555555556e-06, |
|
"loss": 0.9729, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_accuracy": 0.5329341317365269, |
|
"eval_loss": 0.9545565843582153, |
|
"eval_runtime": 30.3453, |
|
"eval_samples_per_second": 165.099, |
|
"eval_steps_per_second": 20.662, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 3.498305555555556e-06, |
|
"loss": 0.9701, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_accuracy": 0.5331337325349301, |
|
"eval_loss": 0.9543370008468628, |
|
"eval_runtime": 30.313, |
|
"eval_samples_per_second": 165.276, |
|
"eval_steps_per_second": 20.684, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 3.5260833333333333e-06, |
|
"loss": 0.9697, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_accuracy": 0.5297405189620759, |
|
"eval_loss": 0.9533666968345642, |
|
"eval_runtime": 30.3639, |
|
"eval_samples_per_second": 164.999, |
|
"eval_steps_per_second": 20.65, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 3.553833333333333e-06, |
|
"loss": 0.9601, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_accuracy": 0.5373253493013972, |
|
"eval_loss": 0.9470999240875244, |
|
"eval_runtime": 30.3245, |
|
"eval_samples_per_second": 165.213, |
|
"eval_steps_per_second": 20.676, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 3.581611111111111e-06, |
|
"loss": 0.9665, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 0.9526985883712769, |
|
"eval_runtime": 30.1783, |
|
"eval_samples_per_second": 166.013, |
|
"eval_steps_per_second": 20.777, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 3.6093611111111114e-06, |
|
"loss": 0.9651, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_accuracy": 0.5269461077844312, |
|
"eval_loss": 0.9613257050514221, |
|
"eval_runtime": 30.176, |
|
"eval_samples_per_second": 166.026, |
|
"eval_steps_per_second": 20.778, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 3.6371388888888894e-06, |
|
"loss": 0.9609, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 0.9485881924629211, |
|
"eval_runtime": 30.2945, |
|
"eval_samples_per_second": 165.377, |
|
"eval_steps_per_second": 20.697, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.6648888888888893e-06, |
|
"loss": 0.9592, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_accuracy": 0.5409181636726547, |
|
"eval_loss": 0.9521782398223877, |
|
"eval_runtime": 30.2884, |
|
"eval_samples_per_second": 165.41, |
|
"eval_steps_per_second": 20.701, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 3.6926666666666673e-06, |
|
"loss": 0.9719, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_accuracy": 0.5317365269461077, |
|
"eval_loss": 0.9502424597740173, |
|
"eval_runtime": 30.2386, |
|
"eval_samples_per_second": 165.682, |
|
"eval_steps_per_second": 20.735, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 3.720416666666667e-06, |
|
"loss": 0.9679, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_accuracy": 0.5395209580838324, |
|
"eval_loss": 0.9553654193878174, |
|
"eval_runtime": 30.2758, |
|
"eval_samples_per_second": 165.479, |
|
"eval_steps_per_second": 20.71, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 3.7481944444444443e-06, |
|
"loss": 0.9705, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_accuracy": 0.5269461077844312, |
|
"eval_loss": 0.9593011140823364, |
|
"eval_runtime": 30.5759, |
|
"eval_samples_per_second": 163.855, |
|
"eval_steps_per_second": 20.506, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 3.7759444444444445e-06, |
|
"loss": 0.9666, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_accuracy": 0.53812375249501, |
|
"eval_loss": 0.9483373165130615, |
|
"eval_runtime": 30.2907, |
|
"eval_samples_per_second": 165.398, |
|
"eval_steps_per_second": 20.699, |
|
"step": 136000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 10000000, |
|
"num_train_epochs": 408, |
|
"save_steps": 1000, |
|
"total_flos": 1.4214734598242304e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|