|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.4608, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 10.7159, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 7.062511920928955, |
|
"eval_runtime": 16.5468, |
|
"eval_samples_per_second": 604.347, |
|
"eval_steps_per_second": 37.772, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 9.65e-06, |
|
"loss": 5.0371, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.8384995460510254, |
|
"eval_runtime": 16.0338, |
|
"eval_samples_per_second": 623.684, |
|
"eval_steps_per_second": 38.98, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.4449999999999999e-05, |
|
"loss": 2.4865, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.1602892875671387, |
|
"eval_runtime": 16.2241, |
|
"eval_samples_per_second": 616.367, |
|
"eval_steps_per_second": 38.523, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.93e-05, |
|
"loss": 1.0806, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 0.7614782452583313, |
|
"eval_runtime": 16.2665, |
|
"eval_samples_per_second": 614.761, |
|
"eval_steps_per_second": 38.423, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.415e-05, |
|
"loss": 0.8412, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 0.726601243019104, |
|
"eval_runtime": 16.7852, |
|
"eval_samples_per_second": 595.764, |
|
"eval_steps_per_second": 37.235, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.802, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 0.7105833292007446, |
|
"eval_runtime": 16.763, |
|
"eval_samples_per_second": 596.553, |
|
"eval_steps_per_second": 37.285, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.385e-05, |
|
"loss": 0.7776, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_loss": 0.6986653804779053, |
|
"eval_runtime": 16.7875, |
|
"eval_samples_per_second": 595.68, |
|
"eval_steps_per_second": 37.23, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 3.8700000000000006e-05, |
|
"loss": 0.7568, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_loss": 0.687833309173584, |
|
"eval_runtime": 16.4861, |
|
"eval_samples_per_second": 606.571, |
|
"eval_steps_per_second": 37.911, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 4.355e-05, |
|
"loss": 0.7382, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 0.6793721914291382, |
|
"eval_runtime": 16.6577, |
|
"eval_samples_per_second": 600.323, |
|
"eval_steps_per_second": 37.52, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 4.8400000000000004e-05, |
|
"loss": 0.7202, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_loss": 0.6695303320884705, |
|
"eval_runtime": 16.3181, |
|
"eval_samples_per_second": 612.817, |
|
"eval_steps_per_second": 38.301, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 4.6542553191489364e-05, |
|
"loss": 0.7022, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_loss": 0.6605609059333801, |
|
"eval_runtime": 16.748, |
|
"eval_samples_per_second": 597.086, |
|
"eval_steps_per_second": 37.318, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 4.138297872340426e-05, |
|
"loss": 0.6844, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_loss": 0.6539720892906189, |
|
"eval_runtime": 16.7037, |
|
"eval_samples_per_second": 598.671, |
|
"eval_steps_per_second": 37.417, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 3.622340425531915e-05, |
|
"loss": 0.669, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_loss": 0.6471053957939148, |
|
"eval_runtime": 16.5172, |
|
"eval_samples_per_second": 605.43, |
|
"eval_steps_per_second": 37.839, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 3.1063829787234046e-05, |
|
"loss": 0.6562, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_loss": 0.6417160034179688, |
|
"eval_runtime": 16.0822, |
|
"eval_samples_per_second": 621.804, |
|
"eval_steps_per_second": 38.863, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 2.590425531914894e-05, |
|
"loss": 0.6453, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_loss": 0.6380994915962219, |
|
"eval_runtime": 16.4132, |
|
"eval_samples_per_second": 609.265, |
|
"eval_steps_per_second": 38.079, |
|
"step": 1455 |
|
} |
|
], |
|
"max_steps": 1940, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.89188339597312e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|