BART_QA / trainer_state.json
student-shriman's picture
Upload 12 files
d7d0bc9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 33000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"learning_rate": 1.969939393939394e-05,
"loss": 2.4249,
"step": 500
},
{
"epoch": 0.45,
"learning_rate": 1.9396363636363637e-05,
"loss": 2.3742,
"step": 1000
},
{
"epoch": 0.68,
"learning_rate": 1.9093939393939395e-05,
"loss": 2.3454,
"step": 1500
},
{
"epoch": 0.91,
"learning_rate": 1.8790909090909093e-05,
"loss": 2.3292,
"step": 2000
},
{
"epoch": 1.0,
"eval_loss": 2.4535229206085205,
"eval_runtime": 1.8377,
"eval_samples_per_second": 136.041,
"eval_steps_per_second": 22.855,
"step": 2200
},
{
"epoch": 1.14,
"learning_rate": 1.848787878787879e-05,
"loss": 2.1317,
"step": 2500
},
{
"epoch": 1.36,
"learning_rate": 1.8184848484848487e-05,
"loss": 2.0183,
"step": 3000
},
{
"epoch": 1.59,
"learning_rate": 1.788181818181818e-05,
"loss": 2.0052,
"step": 3500
},
{
"epoch": 1.82,
"learning_rate": 1.757878787878788e-05,
"loss": 1.9885,
"step": 4000
},
{
"epoch": 2.0,
"eval_loss": 2.4263100624084473,
"eval_runtime": 1.8704,
"eval_samples_per_second": 133.662,
"eval_steps_per_second": 22.455,
"step": 4400
},
{
"epoch": 2.05,
"learning_rate": 1.727575757575758e-05,
"loss": 1.9505,
"step": 4500
},
{
"epoch": 2.27,
"learning_rate": 1.6972727272727273e-05,
"loss": 1.7025,
"step": 5000
},
{
"epoch": 2.5,
"learning_rate": 1.6669696969696972e-05,
"loss": 1.717,
"step": 5500
},
{
"epoch": 2.73,
"learning_rate": 1.636727272727273e-05,
"loss": 1.7028,
"step": 6000
},
{
"epoch": 2.95,
"learning_rate": 1.6064242424242428e-05,
"loss": 1.741,
"step": 6500
},
{
"epoch": 3.0,
"eval_loss": 2.508474111557007,
"eval_runtime": 1.8606,
"eval_samples_per_second": 134.366,
"eval_steps_per_second": 22.573,
"step": 6600
},
{
"epoch": 3.18,
"learning_rate": 1.5761212121212123e-05,
"loss": 1.4946,
"step": 7000
},
{
"epoch": 3.41,
"learning_rate": 1.5458181818181818e-05,
"loss": 1.4504,
"step": 7500
},
{
"epoch": 3.64,
"learning_rate": 1.5155151515151516e-05,
"loss": 1.4769,
"step": 8000
},
{
"epoch": 3.86,
"learning_rate": 1.4852121212121213e-05,
"loss": 1.4818,
"step": 8500
},
{
"epoch": 4.0,
"eval_loss": 2.5952579975128174,
"eval_runtime": 1.911,
"eval_samples_per_second": 130.825,
"eval_steps_per_second": 21.979,
"step": 8800
},
{
"epoch": 4.09,
"learning_rate": 1.4549090909090911e-05,
"loss": 1.3898,
"step": 9000
},
{
"epoch": 4.32,
"learning_rate": 1.4246666666666669e-05,
"loss": 1.218,
"step": 9500
},
{
"epoch": 4.55,
"learning_rate": 1.3943636363636365e-05,
"loss": 1.2481,
"step": 10000
},
{
"epoch": 4.77,
"learning_rate": 1.364060606060606e-05,
"loss": 1.2518,
"step": 10500
},
{
"epoch": 5.0,
"learning_rate": 1.3337575757575759e-05,
"loss": 1.2692,
"step": 11000
},
{
"epoch": 5.0,
"eval_loss": 2.763364791870117,
"eval_runtime": 1.8787,
"eval_samples_per_second": 133.072,
"eval_steps_per_second": 22.356,
"step": 11000
},
{
"epoch": 5.23,
"learning_rate": 1.3035151515151516e-05,
"loss": 1.0289,
"step": 11500
},
{
"epoch": 5.45,
"learning_rate": 1.2732727272727275e-05,
"loss": 1.0458,
"step": 12000
},
{
"epoch": 5.68,
"learning_rate": 1.2429696969696972e-05,
"loss": 1.0494,
"step": 12500
},
{
"epoch": 5.91,
"learning_rate": 1.2126666666666667e-05,
"loss": 1.057,
"step": 13000
},
{
"epoch": 6.0,
"eval_loss": 2.8617300987243652,
"eval_runtime": 1.8519,
"eval_samples_per_second": 135.0,
"eval_steps_per_second": 22.68,
"step": 13200
},
{
"epoch": 6.14,
"learning_rate": 1.1823636363636364e-05,
"loss": 0.9342,
"step": 13500
},
{
"epoch": 6.36,
"learning_rate": 1.152060606060606e-05,
"loss": 0.8533,
"step": 14000
},
{
"epoch": 6.59,
"learning_rate": 1.1217575757575759e-05,
"loss": 0.8949,
"step": 14500
},
{
"epoch": 6.82,
"learning_rate": 1.0914545454545456e-05,
"loss": 0.8928,
"step": 15000
},
{
"epoch": 7.0,
"eval_loss": 3.067075729370117,
"eval_runtime": 1.8518,
"eval_samples_per_second": 135.007,
"eval_steps_per_second": 22.681,
"step": 15400
},
{
"epoch": 7.05,
"learning_rate": 1.0611515151515152e-05,
"loss": 0.8587,
"step": 15500
},
{
"epoch": 7.27,
"learning_rate": 1.030848484848485e-05,
"loss": 0.7187,
"step": 16000
},
{
"epoch": 7.5,
"learning_rate": 1.0006060606060606e-05,
"loss": 0.7212,
"step": 16500
},
{
"epoch": 7.73,
"learning_rate": 9.703030303030305e-06,
"loss": 0.7395,
"step": 17000
},
{
"epoch": 7.95,
"learning_rate": 9.4e-06,
"loss": 0.758,
"step": 17500
},
{
"epoch": 8.0,
"eval_loss": 3.21189546585083,
"eval_runtime": 1.8755,
"eval_samples_per_second": 133.301,
"eval_steps_per_second": 22.395,
"step": 17600
},
{
"epoch": 8.18,
"learning_rate": 9.097575757575759e-06,
"loss": 0.6303,
"step": 18000
},
{
"epoch": 8.41,
"learning_rate": 8.794545454545456e-06,
"loss": 0.5999,
"step": 18500
},
{
"epoch": 8.64,
"learning_rate": 8.491515151515152e-06,
"loss": 0.6254,
"step": 19000
},
{
"epoch": 8.86,
"learning_rate": 8.188484848484849e-06,
"loss": 0.6222,
"step": 19500
},
{
"epoch": 9.0,
"eval_loss": 3.3879506587982178,
"eval_runtime": 1.8402,
"eval_samples_per_second": 135.853,
"eval_steps_per_second": 22.823,
"step": 19800
},
{
"epoch": 9.09,
"learning_rate": 7.885454545454546e-06,
"loss": 0.5877,
"step": 20000
},
{
"epoch": 9.32,
"learning_rate": 7.582424242424243e-06,
"loss": 0.5085,
"step": 20500
},
{
"epoch": 9.55,
"learning_rate": 7.279393939393939e-06,
"loss": 0.5189,
"step": 21000
},
{
"epoch": 9.77,
"learning_rate": 6.976363636363637e-06,
"loss": 0.5198,
"step": 21500
},
{
"epoch": 10.0,
"learning_rate": 6.673939393939395e-06,
"loss": 0.5228,
"step": 22000
},
{
"epoch": 10.0,
"eval_loss": 3.485287666320801,
"eval_runtime": 1.8718,
"eval_samples_per_second": 133.559,
"eval_steps_per_second": 22.438,
"step": 22000
},
{
"epoch": 10.23,
"learning_rate": 6.371515151515152e-06,
"loss": 0.4323,
"step": 22500
},
{
"epoch": 10.45,
"learning_rate": 6.068484848484849e-06,
"loss": 0.4348,
"step": 23000
},
{
"epoch": 10.68,
"learning_rate": 5.7654545454545465e-06,
"loss": 0.4376,
"step": 23500
},
{
"epoch": 10.91,
"learning_rate": 5.4624242424242424e-06,
"loss": 0.4441,
"step": 24000
},
{
"epoch": 11.0,
"eval_loss": 3.6241962909698486,
"eval_runtime": 1.8617,
"eval_samples_per_second": 134.283,
"eval_steps_per_second": 22.56,
"step": 24200
},
{
"epoch": 11.14,
"learning_rate": 5.15939393939394e-06,
"loss": 0.3995,
"step": 24500
},
{
"epoch": 11.36,
"learning_rate": 4.856363636363637e-06,
"loss": 0.3728,
"step": 25000
},
{
"epoch": 11.59,
"learning_rate": 4.5533333333333335e-06,
"loss": 0.3743,
"step": 25500
},
{
"epoch": 11.82,
"learning_rate": 4.250303030303031e-06,
"loss": 0.3787,
"step": 26000
},
{
"epoch": 12.0,
"eval_loss": 3.684976100921631,
"eval_runtime": 1.8613,
"eval_samples_per_second": 134.316,
"eval_steps_per_second": 22.565,
"step": 26400
},
{
"epoch": 12.05,
"learning_rate": 3.947272727272727e-06,
"loss": 0.374,
"step": 26500
},
{
"epoch": 12.27,
"learning_rate": 3.645454545454546e-06,
"loss": 0.3186,
"step": 27000
},
{
"epoch": 12.5,
"learning_rate": 3.3424242424242424e-06,
"loss": 0.3265,
"step": 27500
},
{
"epoch": 12.73,
"learning_rate": 3.03939393939394e-06,
"loss": 0.3263,
"step": 28000
},
{
"epoch": 12.95,
"learning_rate": 2.7363636363636363e-06,
"loss": 0.3312,
"step": 28500
},
{
"epoch": 13.0,
"eval_loss": 3.783233880996704,
"eval_runtime": 1.8573,
"eval_samples_per_second": 134.603,
"eval_steps_per_second": 22.613,
"step": 28600
},
{
"epoch": 13.18,
"learning_rate": 2.4333333333333335e-06,
"loss": 0.2967,
"step": 29000
},
{
"epoch": 13.41,
"learning_rate": 2.130909090909091e-06,
"loss": 0.2904,
"step": 29500
},
{
"epoch": 13.64,
"learning_rate": 1.827878787878788e-06,
"loss": 0.293,
"step": 30000
},
{
"epoch": 13.86,
"learning_rate": 1.5248484848484849e-06,
"loss": 0.2893,
"step": 30500
},
{
"epoch": 14.0,
"eval_loss": 3.7963521480560303,
"eval_runtime": 1.8603,
"eval_samples_per_second": 134.387,
"eval_steps_per_second": 22.577,
"step": 30800
},
{
"epoch": 14.09,
"learning_rate": 1.221818181818182e-06,
"loss": 0.2799,
"step": 31000
},
{
"epoch": 14.32,
"learning_rate": 9.187878787878789e-07,
"loss": 0.2723,
"step": 31500
},
{
"epoch": 14.55,
"learning_rate": 6.163636363636364e-07,
"loss": 0.2673,
"step": 32000
},
{
"epoch": 14.77,
"learning_rate": 3.1333333333333333e-07,
"loss": 0.2642,
"step": 32500
},
{
"epoch": 15.0,
"learning_rate": 1.0303030303030303e-08,
"loss": 0.2671,
"step": 33000
}
],
"max_steps": 33000,
"num_train_epochs": 15,
"total_flos": 1.1231089438777344e+17,
"trial_name": null,
"trial_params": null
}