|
{ |
|
"best_metric": 0.01884845644235611, |
|
"best_model_checkpoint": "./results/checkpoint-4500", |
|
"epoch": 0.9439899307740718, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9300748199426614e-05, |
|
"loss": 0.0391, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.990558615263572, |
|
"eval_loss": 0.04349859058856964, |
|
"eval_runtime": 154.2476, |
|
"eval_samples_per_second": 123.6, |
|
"eval_steps_per_second": 7.728, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.860149639885323e-05, |
|
"loss": 0.0327, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.9903488067138736, |
|
"eval_loss": 0.04805811867117882, |
|
"eval_runtime": 154.05, |
|
"eval_samples_per_second": 123.759, |
|
"eval_steps_per_second": 7.738, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.790224459827984e-05, |
|
"loss": 0.0266, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.9911355887752425, |
|
"eval_loss": 0.0459710918366909, |
|
"eval_runtime": 154.4944, |
|
"eval_samples_per_second": 123.403, |
|
"eval_steps_per_second": 7.715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7202992797706454e-05, |
|
"loss": 0.035, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.9916601101494886, |
|
"eval_loss": 0.03351669758558273, |
|
"eval_runtime": 154.0214, |
|
"eval_samples_per_second": 123.782, |
|
"eval_steps_per_second": 7.739, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.650374099713307e-05, |
|
"loss": 0.0371, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.9930763178599528, |
|
"eval_loss": 0.025116313248872757, |
|
"eval_runtime": 153.2057, |
|
"eval_samples_per_second": 124.441, |
|
"eval_steps_per_second": 7.78, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.580448919655968e-05, |
|
"loss": 0.026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.9938630999213218, |
|
"eval_loss": 0.027913494035601616, |
|
"eval_runtime": 153.7631, |
|
"eval_samples_per_second": 123.989, |
|
"eval_steps_per_second": 7.752, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5105237395986297e-05, |
|
"loss": 0.0299, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.9934959349593496, |
|
"eval_loss": 0.027549268677830696, |
|
"eval_runtime": 153.8917, |
|
"eval_samples_per_second": 123.886, |
|
"eval_steps_per_second": 7.746, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4405985595412911e-05, |
|
"loss": 0.0254, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.9945449777078416, |
|
"eval_loss": 0.024684084579348564, |
|
"eval_runtime": 153.4415, |
|
"eval_samples_per_second": 124.249, |
|
"eval_steps_per_second": 7.768, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3706733794839521e-05, |
|
"loss": 0.0219, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9954366640440598, |
|
"eval_loss": 0.01884845644235611, |
|
"eval_runtime": 153.901, |
|
"eval_samples_per_second": 123.878, |
|
"eval_steps_per_second": 7.745, |
|
"step": 4500 |
|
} |
|
], |
|
"max_steps": 14301, |
|
"num_train_epochs": 3, |
|
"total_flos": 9537652703232000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|