llama-160m-sst2 / trainer_state.json
Cheng98's picture
End of training
8589bcc
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.998099762470309,
"global_step": 2104,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.95,
"learning_rate": 3.880703422053232e-05,
"loss": 0.7156,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.5905963302752294,
"eval_loss": 0.6729157567024231,
"eval_runtime": 2.6255,
"eval_samples_per_second": 332.129,
"eval_steps_per_second": 10.665,
"step": 526
},
{
"epoch": 1.9,
"learning_rate": 2.692490494296578e-05,
"loss": 0.6745,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.6387614678899083,
"eval_loss": 0.6506585478782654,
"eval_runtime": 2.6291,
"eval_samples_per_second": 331.667,
"eval_steps_per_second": 10.65,
"step": 1052
},
{
"epoch": 2.85,
"learning_rate": 1.5042775665399238e-05,
"loss": 0.6574,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.6456422018348624,
"eval_loss": 0.6401416659355164,
"eval_runtime": 2.6431,
"eval_samples_per_second": 329.91,
"eval_steps_per_second": 10.593,
"step": 1578
},
{
"epoch": 3.8,
"learning_rate": 3.1606463878326996e-06,
"loss": 0.6507,
"step": 2000
},
{
"epoch": 4.0,
"eval_accuracy": 0.6444954128440367,
"eval_loss": 0.6368556618690491,
"eval_runtime": 2.6019,
"eval_samples_per_second": 335.136,
"eval_steps_per_second": 10.761,
"step": 2104
},
{
"epoch": 4.0,
"step": 2104,
"total_flos": 9.369725771789107e+16,
"train_loss": 0.6729737561011949,
"train_runtime": 2059.9591,
"train_samples_per_second": 130.777,
"train_steps_per_second": 1.021
}
],
"max_steps": 2104,
"num_train_epochs": 4,
"total_flos": 9.369725771789107e+16,
"trial_name": null,
"trial_params": null
}