|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.901960784313726, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.673202614379085e-05, |
|
"loss": 0.0803, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9874985395490128, |
|
"eval_loss": 0.03710582107305527, |
|
"eval_runtime": 80.6999, |
|
"eval_samples_per_second": 106.06, |
|
"eval_steps_per_second": 2.528, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3464052287581704e-05, |
|
"loss": 0.0393, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9884332281808622, |
|
"eval_loss": 0.03642109036445618, |
|
"eval_runtime": 80.6008, |
|
"eval_samples_per_second": 106.19, |
|
"eval_steps_per_second": 2.531, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.0196078431372555e-05, |
|
"loss": 0.0372, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.986447014838182, |
|
"eval_loss": 0.04179869592189789, |
|
"eval_runtime": 80.6457, |
|
"eval_samples_per_second": 106.131, |
|
"eval_steps_per_second": 2.53, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.6928104575163405e-05, |
|
"loss": 0.0114, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.9896015889706742, |
|
"eval_loss": 0.0394107885658741, |
|
"eval_runtime": 80.6065, |
|
"eval_samples_per_second": 106.182, |
|
"eval_steps_per_second": 2.531, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.366013071895425e-05, |
|
"loss": 0.0138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.9897184250496553, |
|
"eval_loss": 0.0397811159491539, |
|
"eval_runtime": 80.5677, |
|
"eval_samples_per_second": 106.234, |
|
"eval_steps_per_second": 2.532, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.0392156862745097e-05, |
|
"loss": 0.0127, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.9908867858394672, |
|
"eval_loss": 0.034337081015110016, |
|
"eval_runtime": 80.5302, |
|
"eval_samples_per_second": 106.283, |
|
"eval_steps_per_second": 2.533, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.7124183006535947e-05, |
|
"loss": 0.0052, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_accuracy": 0.9911204579974296, |
|
"eval_loss": 0.037403274327516556, |
|
"eval_runtime": 80.6258, |
|
"eval_samples_per_second": 106.157, |
|
"eval_steps_per_second": 2.53, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.38562091503268e-05, |
|
"loss": 0.0018, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.9906531136815049, |
|
"eval_loss": 0.04766124114394188, |
|
"eval_runtime": 80.6271, |
|
"eval_samples_per_second": 106.155, |
|
"eval_steps_per_second": 2.53, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.0009, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.9912372940764108, |
|
"eval_loss": 0.0513538159430027, |
|
"eval_runtime": 80.6405, |
|
"eval_samples_per_second": 106.138, |
|
"eval_steps_per_second": 2.53, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.7320261437908496e-05, |
|
"loss": 0.001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_accuracy": 0.9904194415235424, |
|
"eval_loss": 0.046223659068346024, |
|
"eval_runtime": 80.5163, |
|
"eval_samples_per_second": 106.301, |
|
"eval_steps_per_second": 2.534, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.4052287581699347e-05, |
|
"loss": 0.0002, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.9911204579974296, |
|
"eval_loss": 0.04658184573054314, |
|
"eval_runtime": 80.6281, |
|
"eval_samples_per_second": 106.154, |
|
"eval_steps_per_second": 2.53, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0784313725490197e-05, |
|
"loss": 0.0006, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_accuracy": 0.9905362776025236, |
|
"eval_loss": 0.046631619334220886, |
|
"eval_runtime": 80.5126, |
|
"eval_samples_per_second": 106.306, |
|
"eval_steps_per_second": 2.534, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 7.5163398692810456e-06, |
|
"loss": 0.0001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_accuracy": 0.990769949760486, |
|
"eval_loss": 0.05143677070736885, |
|
"eval_runtime": 80.6039, |
|
"eval_samples_per_second": 106.186, |
|
"eval_steps_per_second": 2.531, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.2483660130718954e-06, |
|
"loss": 0.0005, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_accuracy": 0.9908867858394672, |
|
"eval_loss": 0.04908544197678566, |
|
"eval_runtime": 80.4782, |
|
"eval_samples_per_second": 106.352, |
|
"eval_steps_per_second": 2.535, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 9.80392156862745e-07, |
|
"loss": 0.0004, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.9910036219184484, |
|
"eval_loss": 0.049937766045331955, |
|
"eval_runtime": 80.6026, |
|
"eval_samples_per_second": 106.188, |
|
"eval_steps_per_second": 2.531, |
|
"step": 3000 |
|
} |
|
], |
|
"max_steps": 3060, |
|
"num_train_epochs": 5, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|