pmc_vit-l-14_hf / trainer_state.json
ryanyip7777's picture
Upload 10 files
99fcf3a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 41090,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 4.9391579459722564e-05,
"loss": 1.7982,
"step": 500
},
{
"epoch": 0.24,
"learning_rate": 4.878315891944512e-05,
"loss": 1.2914,
"step": 1000
},
{
"epoch": 0.37,
"learning_rate": 4.817473837916768e-05,
"loss": 1.1456,
"step": 1500
},
{
"epoch": 0.49,
"learning_rate": 4.756631783889024e-05,
"loss": 1.0345,
"step": 2000
},
{
"epoch": 0.61,
"learning_rate": 4.6957897298612804e-05,
"loss": 0.9665,
"step": 2500
},
{
"epoch": 0.73,
"learning_rate": 4.6349476758335365e-05,
"loss": 0.9272,
"step": 3000
},
{
"epoch": 0.85,
"learning_rate": 4.574105621805793e-05,
"loss": 0.8571,
"step": 3500
},
{
"epoch": 0.97,
"learning_rate": 4.513263567778048e-05,
"loss": 0.8139,
"step": 4000
},
{
"epoch": 1.1,
"learning_rate": 4.4524215137503043e-05,
"loss": 0.6986,
"step": 4500
},
{
"epoch": 1.22,
"learning_rate": 4.3915794597225605e-05,
"loss": 0.6375,
"step": 5000
},
{
"epoch": 1.34,
"learning_rate": 4.330737405694817e-05,
"loss": 0.6407,
"step": 5500
},
{
"epoch": 1.46,
"learning_rate": 4.269895351667073e-05,
"loss": 0.6139,
"step": 6000
},
{
"epoch": 1.58,
"learning_rate": 4.209053297639329e-05,
"loss": 0.5973,
"step": 6500
},
{
"epoch": 1.7,
"learning_rate": 4.1482112436115845e-05,
"loss": 0.6094,
"step": 7000
},
{
"epoch": 1.83,
"learning_rate": 4.0873691895838406e-05,
"loss": 0.5824,
"step": 7500
},
{
"epoch": 1.95,
"learning_rate": 4.026527135556097e-05,
"loss": 0.5459,
"step": 8000
},
{
"epoch": 2.07,
"learning_rate": 3.965685081528352e-05,
"loss": 0.4764,
"step": 8500
},
{
"epoch": 2.19,
"learning_rate": 3.9048430275006084e-05,
"loss": 0.427,
"step": 9000
},
{
"epoch": 2.31,
"learning_rate": 3.8440009734728646e-05,
"loss": 0.4192,
"step": 9500
},
{
"epoch": 2.43,
"learning_rate": 3.783158919445121e-05,
"loss": 0.4278,
"step": 10000
},
{
"epoch": 2.56,
"learning_rate": 3.722316865417377e-05,
"loss": 0.4157,
"step": 10500
},
{
"epoch": 2.68,
"learning_rate": 3.6614748113896324e-05,
"loss": 0.4156,
"step": 11000
},
{
"epoch": 2.8,
"learning_rate": 3.6006327573618886e-05,
"loss": 0.4137,
"step": 11500
},
{
"epoch": 2.92,
"learning_rate": 3.539790703334145e-05,
"loss": 0.3731,
"step": 12000
},
{
"epoch": 3.04,
"learning_rate": 3.4789486493064e-05,
"loss": 0.3516,
"step": 12500
},
{
"epoch": 3.16,
"learning_rate": 3.418106595278657e-05,
"loss": 0.3098,
"step": 13000
},
{
"epoch": 3.29,
"learning_rate": 3.357264541250913e-05,
"loss": 0.3051,
"step": 13500
},
{
"epoch": 3.41,
"learning_rate": 3.296422487223169e-05,
"loss": 0.3029,
"step": 14000
},
{
"epoch": 3.53,
"learning_rate": 3.235580433195425e-05,
"loss": 0.3107,
"step": 14500
},
{
"epoch": 3.65,
"learning_rate": 3.174738379167681e-05,
"loss": 0.2969,
"step": 15000
},
{
"epoch": 3.77,
"learning_rate": 3.1138963251399365e-05,
"loss": 0.2886,
"step": 15500
},
{
"epoch": 3.89,
"learning_rate": 3.053054271112193e-05,
"loss": 0.2887,
"step": 16000
},
{
"epoch": 4.02,
"learning_rate": 2.9922122170844492e-05,
"loss": 0.2803,
"step": 16500
},
{
"epoch": 4.14,
"learning_rate": 2.9313701630567047e-05,
"loss": 0.2256,
"step": 17000
},
{
"epoch": 4.26,
"learning_rate": 2.8705281090289608e-05,
"loss": 0.2209,
"step": 17500
},
{
"epoch": 4.38,
"learning_rate": 2.8096860550012173e-05,
"loss": 0.2197,
"step": 18000
},
{
"epoch": 4.5,
"learning_rate": 2.7488440009734728e-05,
"loss": 0.2305,
"step": 18500
},
{
"epoch": 4.62,
"learning_rate": 2.688001946945729e-05,
"loss": 0.2252,
"step": 19000
},
{
"epoch": 4.75,
"learning_rate": 2.627159892917985e-05,
"loss": 0.207,
"step": 19500
},
{
"epoch": 4.87,
"learning_rate": 2.566317838890241e-05,
"loss": 0.2187,
"step": 20000
},
{
"epoch": 4.99,
"learning_rate": 2.505475784862497e-05,
"loss": 0.207,
"step": 20500
},
{
"epoch": 5.11,
"learning_rate": 2.444633730834753e-05,
"loss": 0.1685,
"step": 21000
},
{
"epoch": 5.23,
"learning_rate": 2.383791676807009e-05,
"loss": 0.1772,
"step": 21500
},
{
"epoch": 5.35,
"learning_rate": 2.3229496227792653e-05,
"loss": 0.1728,
"step": 22000
},
{
"epoch": 5.48,
"learning_rate": 2.262107568751521e-05,
"loss": 0.1678,
"step": 22500
},
{
"epoch": 5.6,
"learning_rate": 2.201265514723777e-05,
"loss": 0.1642,
"step": 23000
},
{
"epoch": 5.72,
"learning_rate": 2.1404234606960334e-05,
"loss": 0.1664,
"step": 23500
},
{
"epoch": 5.84,
"learning_rate": 2.0795814066682892e-05,
"loss": 0.1575,
"step": 24000
},
{
"epoch": 5.96,
"learning_rate": 2.018739352640545e-05,
"loss": 0.1599,
"step": 24500
},
{
"epoch": 6.08,
"learning_rate": 1.9578972986128012e-05,
"loss": 0.1249,
"step": 25000
},
{
"epoch": 6.21,
"learning_rate": 1.8970552445850574e-05,
"loss": 0.1266,
"step": 25500
},
{
"epoch": 6.33,
"learning_rate": 1.8362131905573132e-05,
"loss": 0.1304,
"step": 26000
},
{
"epoch": 6.45,
"learning_rate": 1.7753711365295694e-05,
"loss": 0.1161,
"step": 26500
},
{
"epoch": 6.57,
"learning_rate": 1.7145290825018255e-05,
"loss": 0.1253,
"step": 27000
},
{
"epoch": 6.69,
"learning_rate": 1.6536870284740814e-05,
"loss": 0.1215,
"step": 27500
},
{
"epoch": 6.81,
"learning_rate": 1.5928449744463375e-05,
"loss": 0.1158,
"step": 28000
},
{
"epoch": 6.94,
"learning_rate": 1.5320029204185933e-05,
"loss": 0.1219,
"step": 28500
},
{
"epoch": 7.06,
"learning_rate": 1.4711608663908493e-05,
"loss": 0.1039,
"step": 29000
},
{
"epoch": 7.18,
"learning_rate": 1.4103188123631053e-05,
"loss": 0.081,
"step": 29500
},
{
"epoch": 7.3,
"learning_rate": 1.3494767583353615e-05,
"loss": 0.0922,
"step": 30000
},
{
"epoch": 7.42,
"learning_rate": 1.2886347043076175e-05,
"loss": 0.0844,
"step": 30500
},
{
"epoch": 7.54,
"learning_rate": 1.2277926502798735e-05,
"loss": 0.0759,
"step": 31000
},
{
"epoch": 7.67,
"learning_rate": 1.1669505962521295e-05,
"loss": 0.0882,
"step": 31500
},
{
"epoch": 7.79,
"learning_rate": 1.1061085422243855e-05,
"loss": 0.0856,
"step": 32000
},
{
"epoch": 7.91,
"learning_rate": 1.0452664881966416e-05,
"loss": 0.0881,
"step": 32500
},
{
"epoch": 8.03,
"learning_rate": 9.844244341688976e-06,
"loss": 0.0708,
"step": 33000
},
{
"epoch": 8.15,
"learning_rate": 9.235823801411536e-06,
"loss": 0.0538,
"step": 33500
},
{
"epoch": 8.27,
"learning_rate": 8.627403261134098e-06,
"loss": 0.0554,
"step": 34000
},
{
"epoch": 8.4,
"learning_rate": 8.018982720856656e-06,
"loss": 0.06,
"step": 34500
},
{
"epoch": 8.52,
"learning_rate": 7.410562180579217e-06,
"loss": 0.0514,
"step": 35000
},
{
"epoch": 8.64,
"learning_rate": 6.802141640301777e-06,
"loss": 0.0536,
"step": 35500
},
{
"epoch": 8.76,
"learning_rate": 6.193721100024337e-06,
"loss": 0.0583,
"step": 36000
},
{
"epoch": 8.88,
"learning_rate": 5.585300559746897e-06,
"loss": 0.052,
"step": 36500
},
{
"epoch": 9.0,
"learning_rate": 4.976880019469458e-06,
"loss": 0.0481,
"step": 37000
},
{
"epoch": 9.13,
"learning_rate": 4.368459479192018e-06,
"loss": 0.0378,
"step": 37500
},
{
"epoch": 9.25,
"learning_rate": 3.760038938914578e-06,
"loss": 0.0296,
"step": 38000
},
{
"epoch": 9.37,
"learning_rate": 3.151618398637138e-06,
"loss": 0.037,
"step": 38500
},
{
"epoch": 9.49,
"learning_rate": 2.543197858359698e-06,
"loss": 0.0339,
"step": 39000
},
{
"epoch": 9.61,
"learning_rate": 1.9347773180822585e-06,
"loss": 0.0369,
"step": 39500
},
{
"epoch": 9.73,
"learning_rate": 1.3263567778048189e-06,
"loss": 0.0344,
"step": 40000
},
{
"epoch": 9.86,
"learning_rate": 7.17936237527379e-07,
"loss": 0.0301,
"step": 40500
},
{
"epoch": 9.98,
"learning_rate": 1.0951569724993917e-07,
"loss": 0.0325,
"step": 41000
},
{
"epoch": 10.0,
"step": 41090,
"total_flos": 1.1825796643443088e+17,
"train_loss": 0.30929526851019973,
"train_runtime": 55900.09,
"train_samples_per_second": 11.761,
"train_steps_per_second": 0.735
}
],
"max_steps": 41090,
"num_train_epochs": 10,
"total_flos": 1.1825796643443088e+17,
"trial_name": null,
"trial_params": null
}