|
{ |
|
"best_metric": 0.9842931937172775, |
|
"best_model_checkpoint": "model/checkpoint-94", |
|
"epoch": 7.703703703703704, |
|
"eval_steps": 500, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00036363636363636367, |
|
"loss": 1.0999, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.643979057591623, |
|
"eval_loss": 0.7986512780189514, |
|
"eval_runtime": 28.581, |
|
"eval_samples_per_second": 6.683, |
|
"eval_steps_per_second": 0.21, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0004731182795698925, |
|
"loss": 1.0029, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00043010752688172043, |
|
"loss": 0.6342, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9424083769633508, |
|
"eval_loss": 0.24137574434280396, |
|
"eval_runtime": 26.3088, |
|
"eval_samples_per_second": 7.26, |
|
"eval_steps_per_second": 0.228, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0003870967741935484, |
|
"loss": 0.5732, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.0003440860215053764, |
|
"loss": 0.4882, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.9633507853403142, |
|
"eval_loss": 0.16461053490638733, |
|
"eval_runtime": 26.318, |
|
"eval_samples_per_second": 7.257, |
|
"eval_steps_per_second": 0.228, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0003010752688172043, |
|
"loss": 0.463, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9424083769633508, |
|
"eval_loss": 0.25284695625305176, |
|
"eval_runtime": 26.2832, |
|
"eval_samples_per_second": 7.267, |
|
"eval_steps_per_second": 0.228, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00025806451612903227, |
|
"loss": 0.4286, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.00021505376344086021, |
|
"loss": 0.4609, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.9790575916230366, |
|
"eval_loss": 0.11301162838935852, |
|
"eval_runtime": 26.3727, |
|
"eval_samples_per_second": 7.242, |
|
"eval_steps_per_second": 0.228, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0001720430107526882, |
|
"loss": 0.4636, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00012903225806451613, |
|
"loss": 0.4251, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9633507853403142, |
|
"eval_loss": 0.13039104640483856, |
|
"eval_runtime": 26.3002, |
|
"eval_samples_per_second": 7.262, |
|
"eval_steps_per_second": 0.228, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 8.60215053763441e-05, |
|
"loss": 0.3802, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_accuracy": 0.9842931937172775, |
|
"eval_loss": 0.07386188954114914, |
|
"eval_runtime": 26.267, |
|
"eval_samples_per_second": 7.271, |
|
"eval_steps_per_second": 0.228, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 4.301075268817205e-05, |
|
"loss": 0.378, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0, |
|
"loss": 0.4147, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_accuracy": 0.9842931937172775, |
|
"eval_loss": 0.06754852086305618, |
|
"eval_runtime": 26.3749, |
|
"eval_samples_per_second": 7.242, |
|
"eval_steps_per_second": 0.227, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"step": 104, |
|
"total_flos": 1.0251773186064077e+18, |
|
"train_loss": 0.5548124955250666, |
|
"train_runtime": 4572.4853, |
|
"train_samples_per_second": 3.004, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 8, |
|
"max_steps": 104, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 1.0251773186064077e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|