gpt2-vnspinads-contents / trainer_state.json
sonnv's picture
push model to huggingface
a176403
{
"best_metric": 0.9393565058708191,
"best_model_checkpoint": "./checkpoint-220",
"epoch": 5.930232558139535,
"global_step": 220,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27,
"learning_rate": 5e-05,
"loss": 6.7937,
"step": 10
},
{
"epoch": 0.27,
"eval_loss": 6.501975059509277,
"eval_runtime": 33.9782,
"eval_samples_per_second": 59.685,
"eval_steps_per_second": 1.884,
"step": 10
},
{
"epoch": 0.53,
"learning_rate": 0.0001,
"loss": 5.985,
"step": 20
},
{
"epoch": 0.53,
"eval_loss": 5.326801776885986,
"eval_runtime": 18.802,
"eval_samples_per_second": 107.861,
"eval_steps_per_second": 3.404,
"step": 20
},
{
"epoch": 0.8,
"learning_rate": 0.00015,
"loss": 4.6798,
"step": 30
},
{
"epoch": 0.8,
"eval_loss": 3.829317331314087,
"eval_runtime": 18.7742,
"eval_samples_per_second": 108.021,
"eval_steps_per_second": 3.409,
"step": 30
},
{
"epoch": 1.08,
"learning_rate": 0.0002,
"loss": 3.0462,
"step": 40
},
{
"epoch": 1.08,
"eval_loss": 1.9157757759094238,
"eval_runtime": 18.7907,
"eval_samples_per_second": 107.926,
"eval_steps_per_second": 3.406,
"step": 40
},
{
"epoch": 1.35,
"learning_rate": 0.00025,
"loss": 1.5197,
"step": 50
},
{
"epoch": 1.35,
"eval_loss": 1.2286747694015503,
"eval_runtime": 18.812,
"eval_samples_per_second": 107.804,
"eval_steps_per_second": 3.402,
"step": 50
},
{
"epoch": 1.61,
"learning_rate": 0.0003,
"loss": 1.1349,
"step": 60
},
{
"epoch": 1.61,
"eval_loss": 1.1046648025512695,
"eval_runtime": 18.8413,
"eval_samples_per_second": 107.636,
"eval_steps_per_second": 3.397,
"step": 60
},
{
"epoch": 1.88,
"learning_rate": 0.00035,
"loss": 1.0718,
"step": 70
},
{
"epoch": 1.88,
"eval_loss": 1.0654218196868896,
"eval_runtime": 18.8231,
"eval_samples_per_second": 107.74,
"eval_steps_per_second": 3.4,
"step": 70
},
{
"epoch": 2.16,
"learning_rate": 0.0004,
"loss": 1.0987,
"step": 80
},
{
"epoch": 2.16,
"eval_loss": 1.0404733419418335,
"eval_runtime": 18.8082,
"eval_samples_per_second": 107.825,
"eval_steps_per_second": 3.403,
"step": 80
},
{
"epoch": 2.43,
"learning_rate": 0.00045000000000000004,
"loss": 1.0133,
"step": 90
},
{
"epoch": 2.43,
"eval_loss": 1.0208371877670288,
"eval_runtime": 18.8192,
"eval_samples_per_second": 107.762,
"eval_steps_per_second": 3.401,
"step": 90
},
{
"epoch": 2.69,
"learning_rate": 0.0005,
"loss": 0.9869,
"step": 100
},
{
"epoch": 2.69,
"eval_loss": 1.0047191381454468,
"eval_runtime": 18.852,
"eval_samples_per_second": 107.575,
"eval_steps_per_second": 3.395,
"step": 100
},
{
"epoch": 2.96,
"learning_rate": 0.00045901639344262296,
"loss": 0.9809,
"step": 110
},
{
"epoch": 2.96,
"eval_loss": 0.9912181496620178,
"eval_runtime": 18.8516,
"eval_samples_per_second": 107.577,
"eval_steps_per_second": 3.395,
"step": 110
},
{
"epoch": 3.24,
"learning_rate": 0.0004180327868852459,
"loss": 1.0275,
"step": 120
},
{
"epoch": 3.24,
"eval_loss": 0.9803428649902344,
"eval_runtime": 18.8293,
"eval_samples_per_second": 107.705,
"eval_steps_per_second": 3.399,
"step": 120
},
{
"epoch": 3.5,
"learning_rate": 0.0003770491803278688,
"loss": 0.9608,
"step": 130
},
{
"epoch": 3.5,
"eval_loss": 0.9717100262641907,
"eval_runtime": 18.8516,
"eval_samples_per_second": 107.577,
"eval_steps_per_second": 3.395,
"step": 130
},
{
"epoch": 3.77,
"learning_rate": 0.0003360655737704918,
"loss": 0.9431,
"step": 140
},
{
"epoch": 3.77,
"eval_loss": 0.9643934965133667,
"eval_runtime": 18.8147,
"eval_samples_per_second": 107.788,
"eval_steps_per_second": 3.402,
"step": 140
},
{
"epoch": 4.05,
"learning_rate": 0.00029508196721311476,
"loss": 0.991,
"step": 150
},
{
"epoch": 4.05,
"eval_loss": 0.9581753015518188,
"eval_runtime": 18.8481,
"eval_samples_per_second": 107.597,
"eval_steps_per_second": 3.396,
"step": 150
},
{
"epoch": 4.32,
"learning_rate": 0.0002540983606557377,
"loss": 0.9387,
"step": 160
},
{
"epoch": 4.32,
"eval_loss": 0.9531411528587341,
"eval_runtime": 18.8022,
"eval_samples_per_second": 107.86,
"eval_steps_per_second": 3.404,
"step": 160
},
{
"epoch": 4.58,
"learning_rate": 0.00021311475409836064,
"loss": 0.9203,
"step": 170
},
{
"epoch": 4.58,
"eval_loss": 0.9489945769309998,
"eval_runtime": 18.7924,
"eval_samples_per_second": 107.916,
"eval_steps_per_second": 3.406,
"step": 170
},
{
"epoch": 4.85,
"learning_rate": 0.00017213114754098362,
"loss": 0.9235,
"step": 180
},
{
"epoch": 4.85,
"eval_loss": 0.9456363320350647,
"eval_runtime": 18.8084,
"eval_samples_per_second": 107.824,
"eval_steps_per_second": 3.403,
"step": 180
},
{
"epoch": 5.13,
"learning_rate": 0.00013114754098360657,
"loss": 0.9746,
"step": 190
},
{
"epoch": 5.13,
"eval_loss": 0.9429621696472168,
"eval_runtime": 18.8357,
"eval_samples_per_second": 107.668,
"eval_steps_per_second": 3.398,
"step": 190
},
{
"epoch": 5.4,
"learning_rate": 9.016393442622952e-05,
"loss": 0.9176,
"step": 200
},
{
"epoch": 5.4,
"eval_loss": 0.9410804510116577,
"eval_runtime": 18.8338,
"eval_samples_per_second": 107.679,
"eval_steps_per_second": 3.398,
"step": 200
},
{
"epoch": 5.66,
"learning_rate": 4.9180327868852456e-05,
"loss": 0.9175,
"step": 210
},
{
"epoch": 5.66,
"eval_loss": 0.9398788213729858,
"eval_runtime": 18.8076,
"eval_samples_per_second": 107.829,
"eval_steps_per_second": 3.403,
"step": 210
},
{
"epoch": 5.93,
"learning_rate": 8.19672131147541e-06,
"loss": 0.91,
"step": 220
},
{
"epoch": 5.93,
"eval_loss": 0.9393565058708191,
"eval_runtime": 18.7996,
"eval_samples_per_second": 107.874,
"eval_steps_per_second": 3.404,
"step": 220
}
],
"max_steps": 222,
"num_train_epochs": 6,
"total_flos": 8.954608582656e+16,
"trial_name": null,
"trial_params": null
}