|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 33000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.969939393939394e-05, |
|
"loss": 2.4249, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9396363636363637e-05, |
|
"loss": 2.3742, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9093939393939395e-05, |
|
"loss": 2.3454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8790909090909093e-05, |
|
"loss": 2.3292, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.4535229206085205, |
|
"eval_runtime": 1.8377, |
|
"eval_samples_per_second": 136.041, |
|
"eval_steps_per_second": 22.855, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.848787878787879e-05, |
|
"loss": 2.1317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.8184848484848487e-05, |
|
"loss": 2.0183, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.788181818181818e-05, |
|
"loss": 2.0052, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.757878787878788e-05, |
|
"loss": 1.9885, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.4263100624084473, |
|
"eval_runtime": 1.8704, |
|
"eval_samples_per_second": 133.662, |
|
"eval_steps_per_second": 22.455, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.727575757575758e-05, |
|
"loss": 1.9505, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.6972727272727273e-05, |
|
"loss": 1.7025, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6669696969696972e-05, |
|
"loss": 1.717, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.636727272727273e-05, |
|
"loss": 1.7028, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.6064242424242428e-05, |
|
"loss": 1.741, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.508474111557007, |
|
"eval_runtime": 1.8606, |
|
"eval_samples_per_second": 134.366, |
|
"eval_steps_per_second": 22.573, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.5761212121212123e-05, |
|
"loss": 1.4946, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.5458181818181818e-05, |
|
"loss": 1.4504, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.5155151515151516e-05, |
|
"loss": 1.4769, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.4852121212121213e-05, |
|
"loss": 1.4818, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.5952579975128174, |
|
"eval_runtime": 1.911, |
|
"eval_samples_per_second": 130.825, |
|
"eval_steps_per_second": 21.979, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.4549090909090911e-05, |
|
"loss": 1.3898, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.4246666666666669e-05, |
|
"loss": 1.218, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.3943636363636365e-05, |
|
"loss": 1.2481, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.364060606060606e-05, |
|
"loss": 1.2518, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.3337575757575759e-05, |
|
"loss": 1.2692, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.763364791870117, |
|
"eval_runtime": 1.8787, |
|
"eval_samples_per_second": 133.072, |
|
"eval_steps_per_second": 22.356, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.3035151515151516e-05, |
|
"loss": 1.0289, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 1.2732727272727275e-05, |
|
"loss": 1.0458, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 1.2429696969696972e-05, |
|
"loss": 1.0494, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.2126666666666667e-05, |
|
"loss": 1.057, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.8617300987243652, |
|
"eval_runtime": 1.8519, |
|
"eval_samples_per_second": 135.0, |
|
"eval_steps_per_second": 22.68, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.1823636363636364e-05, |
|
"loss": 0.9342, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.152060606060606e-05, |
|
"loss": 0.8533, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.1217575757575759e-05, |
|
"loss": 0.8949, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.0914545454545456e-05, |
|
"loss": 0.8928, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 3.067075729370117, |
|
"eval_runtime": 1.8518, |
|
"eval_samples_per_second": 135.007, |
|
"eval_steps_per_second": 22.681, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.0611515151515152e-05, |
|
"loss": 0.8587, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.030848484848485e-05, |
|
"loss": 0.7187, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.0006060606060606e-05, |
|
"loss": 0.7212, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 9.703030303030305e-06, |
|
"loss": 0.7395, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.758, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 3.21189546585083, |
|
"eval_runtime": 1.8755, |
|
"eval_samples_per_second": 133.301, |
|
"eval_steps_per_second": 22.395, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.097575757575759e-06, |
|
"loss": 0.6303, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.794545454545456e-06, |
|
"loss": 0.5999, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 8.491515151515152e-06, |
|
"loss": 0.6254, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 8.188484848484849e-06, |
|
"loss": 0.6222, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 3.3879506587982178, |
|
"eval_runtime": 1.8402, |
|
"eval_samples_per_second": 135.853, |
|
"eval_steps_per_second": 22.823, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.885454545454546e-06, |
|
"loss": 0.5877, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.582424242424243e-06, |
|
"loss": 0.5085, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 7.279393939393939e-06, |
|
"loss": 0.5189, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 6.976363636363637e-06, |
|
"loss": 0.5198, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 6.673939393939395e-06, |
|
"loss": 0.5228, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.485287666320801, |
|
"eval_runtime": 1.8718, |
|
"eval_samples_per_second": 133.559, |
|
"eval_steps_per_second": 22.438, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 6.371515151515152e-06, |
|
"loss": 0.4323, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 6.068484848484849e-06, |
|
"loss": 0.4348, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 5.7654545454545465e-06, |
|
"loss": 0.4376, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 5.4624242424242424e-06, |
|
"loss": 0.4441, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 3.6241962909698486, |
|
"eval_runtime": 1.8617, |
|
"eval_samples_per_second": 134.283, |
|
"eval_steps_per_second": 22.56, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 5.15939393939394e-06, |
|
"loss": 0.3995, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 11.36, |
|
"learning_rate": 4.856363636363637e-06, |
|
"loss": 0.3728, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 4.5533333333333335e-06, |
|
"loss": 0.3743, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 4.250303030303031e-06, |
|
"loss": 0.3787, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 3.684976100921631, |
|
"eval_runtime": 1.8613, |
|
"eval_samples_per_second": 134.316, |
|
"eval_steps_per_second": 22.565, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.947272727272727e-06, |
|
"loss": 0.374, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 3.645454545454546e-06, |
|
"loss": 0.3186, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 3.3424242424242424e-06, |
|
"loss": 0.3265, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 3.03939393939394e-06, |
|
"loss": 0.3263, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 2.7363636363636363e-06, |
|
"loss": 0.3312, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 3.783233880996704, |
|
"eval_runtime": 1.8573, |
|
"eval_samples_per_second": 134.603, |
|
"eval_steps_per_second": 22.613, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 2.4333333333333335e-06, |
|
"loss": 0.2967, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 2.130909090909091e-06, |
|
"loss": 0.2904, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.827878787878788e-06, |
|
"loss": 0.293, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 1.5248484848484849e-06, |
|
"loss": 0.2893, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 3.7963521480560303, |
|
"eval_runtime": 1.8603, |
|
"eval_samples_per_second": 134.387, |
|
"eval_steps_per_second": 22.577, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 1.221818181818182e-06, |
|
"loss": 0.2799, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 9.187878787878789e-07, |
|
"loss": 0.2723, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 6.163636363636364e-07, |
|
"loss": 0.2673, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 3.1333333333333333e-07, |
|
"loss": 0.2642, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.0303030303030303e-08, |
|
"loss": 0.2671, |
|
"step": 33000 |
|
} |
|
], |
|
"max_steps": 33000, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.1231089438777344e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|