|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 10, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0841, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.07586472641552201, |
|
"eval_loss": 2.025390625, |
|
"eval_runtime": 1.7457, |
|
"eval_samples_per_second": 32.651, |
|
"eval_steps_per_second": 1.146, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.062, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.07586472641552201, |
|
"eval_loss": 2.025390625, |
|
"eval_runtime": 2.5631, |
|
"eval_samples_per_second": 22.239, |
|
"eval_steps_per_second": 0.78, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9265847744427305e-05, |
|
"loss": 2.1509, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.07613672423953942, |
|
"eval_loss": 1.994140625, |
|
"eval_runtime": 1.9455, |
|
"eval_samples_per_second": 29.298, |
|
"eval_steps_per_second": 1.028, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 2.1206, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.07563806156217417, |
|
"eval_loss": 1.994140625, |
|
"eval_runtime": 2.5486, |
|
"eval_samples_per_second": 22.365, |
|
"eval_steps_per_second": 0.785, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.3816778784387097e-05, |
|
"loss": 2.2087, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.07568339453284374, |
|
"eval_loss": 1.994140625, |
|
"eval_runtime": 2.1509, |
|
"eval_samples_per_second": 26.501, |
|
"eval_steps_per_second": 0.93, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 2.0337, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.07550206265016547, |
|
"eval_loss": 1.990234375, |
|
"eval_runtime": 1.7684, |
|
"eval_samples_per_second": 32.232, |
|
"eval_steps_per_second": 1.131, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.026, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.07552472913550025, |
|
"eval_loss": 1.9853515625, |
|
"eval_runtime": 1.9602, |
|
"eval_samples_per_second": 29.079, |
|
"eval_steps_per_second": 1.02, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.036474508437579e-05, |
|
"loss": 2.1879, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.07557006210616982, |
|
"eval_loss": 1.9833984375, |
|
"eval_runtime": 1.7437, |
|
"eval_samples_per_second": 32.69, |
|
"eval_steps_per_second": 1.147, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.1832212156129045e-06, |
|
"loss": 2.1052, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.07538873022349155, |
|
"eval_loss": 1.982421875, |
|
"eval_runtime": 2.3577, |
|
"eval_samples_per_second": 24.177, |
|
"eval_steps_per_second": 0.848, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.86474508437579e-06, |
|
"loss": 2.046, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.07541139670882632, |
|
"eval_loss": 1.98046875, |
|
"eval_runtime": 2.5554, |
|
"eval_samples_per_second": 22.306, |
|
"eval_steps_per_second": 0.783, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 10, |
|
"total_flos": 945593647104.0, |
|
"train_loss": 2.1025146484375, |
|
"train_runtime": 54.0138, |
|
"train_samples_per_second": 5.61, |
|
"train_steps_per_second": 0.185 |
|
} |
|
], |
|
"max_steps": 10, |
|
"num_train_epochs": 1, |
|
"total_flos": 945593647104.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|