|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.8226342885780673, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00015, |
|
"loss": 2.8412, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.5813477039337158, |
|
"eval_phone_accuracy": 0.5792233379941528, |
|
"eval_runtime": 11.4324, |
|
"eval_samples_per_second": 17.494, |
|
"eval_steps_per_second": 4.374, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0003, |
|
"loss": 1.482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.8674991726875305, |
|
"eval_phone_accuracy": 0.7458370408033558, |
|
"eval_runtime": 10.836, |
|
"eval_samples_per_second": 18.457, |
|
"eval_steps_per_second": 4.614, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00026656263932233615, |
|
"loss": 1.0056, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.5736156702041626, |
|
"eval_phone_accuracy": 0.8264268463200711, |
|
"eval_runtime": 10.9515, |
|
"eval_samples_per_second": 18.262, |
|
"eval_steps_per_second": 4.566, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002331252786446723, |
|
"loss": 0.7876, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 0.43626296520233154, |
|
"eval_phone_accuracy": 0.8651752044404898, |
|
"eval_runtime": 11.1035, |
|
"eval_samples_per_second": 18.012, |
|
"eval_steps_per_second": 4.503, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00019968791796700847, |
|
"loss": 0.6745, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.3738095760345459, |
|
"eval_phone_accuracy": 0.8828545400618618, |
|
"eval_runtime": 10.7535, |
|
"eval_samples_per_second": 18.599, |
|
"eval_steps_per_second": 4.65, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001662505572893446, |
|
"loss": 0.5965, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.3428588807582855, |
|
"eval_phone_accuracy": 0.8905342993940935, |
|
"eval_runtime": 10.807, |
|
"eval_samples_per_second": 18.507, |
|
"eval_steps_per_second": 4.627, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013281319661168078, |
|
"loss": 0.5429, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.29903942346572876, |
|
"eval_phone_accuracy": 0.9031926613279099, |
|
"eval_runtime": 11.2035, |
|
"eval_samples_per_second": 17.852, |
|
"eval_steps_per_second": 4.463, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.937583593401693e-05, |
|
"loss": 0.5022, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 0.2947227954864502, |
|
"eval_phone_accuracy": 0.9031185119274607, |
|
"eval_runtime": 10.8322, |
|
"eval_samples_per_second": 18.464, |
|
"eval_steps_per_second": 4.616, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.593847525635309e-05, |
|
"loss": 0.4767, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.2698778808116913, |
|
"eval_phone_accuracy": 0.9127261556713698, |
|
"eval_runtime": 11.228, |
|
"eval_samples_per_second": 17.813, |
|
"eval_steps_per_second": 4.453, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.2501114578689255e-05, |
|
"loss": 0.4565, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.2591952979564667, |
|
"eval_phone_accuracy": 0.9146540400830473, |
|
"eval_runtime": 11.459, |
|
"eval_samples_per_second": 17.454, |
|
"eval_steps_per_second": 4.363, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 5486, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.8035219197147087e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|