|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.008939346533768381, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00017878693067536764, |
|
"eval_loss": 2.933220624923706, |
|
"eval_runtime": 233.474, |
|
"eval_samples_per_second": 10.091, |
|
"eval_steps_per_second": 5.046, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008939346533768382, |
|
"grad_norm": 0.2832305431365967, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8622, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0017878693067536764, |
|
"grad_norm": 0.40609437227249146, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7993, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017878693067536764, |
|
"eval_loss": 2.906853437423706, |
|
"eval_runtime": 242.4442, |
|
"eval_samples_per_second": 9.718, |
|
"eval_steps_per_second": 4.859, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0026818039601305146, |
|
"grad_norm": 0.3924393951892853, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 2.764, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0035757386135073527, |
|
"grad_norm": 0.4617941379547119, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 2.7949, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0035757386135073527, |
|
"eval_loss": 2.8182547092437744, |
|
"eval_runtime": 241.8579, |
|
"eval_samples_per_second": 9.741, |
|
"eval_steps_per_second": 4.871, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0044696732668841905, |
|
"grad_norm": 0.5247169137001038, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 2.6913, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005363607920261029, |
|
"grad_norm": 0.5109822750091553, |
|
"learning_rate": 5e-05, |
|
"loss": 2.7626, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005363607920261029, |
|
"eval_loss": 2.7879366874694824, |
|
"eval_runtime": 241.7636, |
|
"eval_samples_per_second": 9.745, |
|
"eval_steps_per_second": 4.873, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006257542573637867, |
|
"grad_norm": 0.4426691234111786, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 2.8036, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0071514772270147055, |
|
"grad_norm": 0.514265775680542, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 2.6892, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0071514772270147055, |
|
"eval_loss": 2.772698402404785, |
|
"eval_runtime": 241.7888, |
|
"eval_samples_per_second": 9.744, |
|
"eval_steps_per_second": 4.872, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008045411880391543, |
|
"grad_norm": 0.6126759648323059, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 2.7549, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.008939346533768381, |
|
"grad_norm": 0.5537281632423401, |
|
"learning_rate": 0.0, |
|
"loss": 2.7579, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008939346533768381, |
|
"eval_loss": 2.7696890830993652, |
|
"eval_runtime": 241.7278, |
|
"eval_samples_per_second": 9.747, |
|
"eval_steps_per_second": 4.873, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.088596299743232e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|