|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 120.0, |
|
"eval_steps": 17748, |
|
"global_step": 177480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.5154340267181396, |
|
"learning_rate": 9.000563443768313e-06, |
|
"loss": 0.2626, |
|
"step": 17748 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 6.236159801483154, |
|
"eval_runtime": 32.4206, |
|
"eval_samples_per_second": 299.655, |
|
"eval_steps_per_second": 6.817, |
|
"step": 17748 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.006092763505876064, |
|
"learning_rate": 8.001014198782963e-06, |
|
"loss": 0.057, |
|
"step": 35496 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 6.354074954986572, |
|
"eval_runtime": 32.2885, |
|
"eval_samples_per_second": 300.881, |
|
"eval_steps_per_second": 6.845, |
|
"step": 35496 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.002155415015295148, |
|
"learning_rate": 7.0013522650439495e-06, |
|
"loss": 0.0428, |
|
"step": 53244 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 6.089537620544434, |
|
"eval_runtime": 30.7505, |
|
"eval_samples_per_second": 315.93, |
|
"eval_steps_per_second": 7.187, |
|
"step": 53244 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.05033240467309952, |
|
"learning_rate": 6.001746675681768e-06, |
|
"loss": 0.0359, |
|
"step": 70992 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 6.1070780754089355, |
|
"eval_runtime": 30.9274, |
|
"eval_samples_per_second": 314.123, |
|
"eval_steps_per_second": 7.146, |
|
"step": 70992 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.0004564746341202408, |
|
"learning_rate": 5.002253775073248e-06, |
|
"loss": 0.0312, |
|
"step": 88740 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 5.999448776245117, |
|
"eval_runtime": 30.7851, |
|
"eval_samples_per_second": 315.575, |
|
"eval_steps_per_second": 7.179, |
|
"step": 88740 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"grad_norm": 0.007316610310226679, |
|
"learning_rate": 4.002704530087897e-06, |
|
"loss": 0.0287, |
|
"step": 106488 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 5.854261875152588, |
|
"eval_runtime": 30.4575, |
|
"eval_samples_per_second": 318.969, |
|
"eval_steps_per_second": 7.256, |
|
"step": 106488 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"grad_norm": 0.5285100936889648, |
|
"learning_rate": 3.0032116294793783e-06, |
|
"loss": 0.0262, |
|
"step": 124236 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 5.759538650512695, |
|
"eval_runtime": 30.8128, |
|
"eval_samples_per_second": 315.291, |
|
"eval_steps_per_second": 7.172, |
|
"step": 124236 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"grad_norm": 0.014891779981553555, |
|
"learning_rate": 2.003718728870859e-06, |
|
"loss": 0.0246, |
|
"step": 141984 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 5.716710567474365, |
|
"eval_runtime": 30.804, |
|
"eval_samples_per_second": 315.381, |
|
"eval_steps_per_second": 7.174, |
|
"step": 141984 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"grad_norm": 0.26408958435058594, |
|
"learning_rate": 1.0041694838855084e-06, |
|
"loss": 0.0227, |
|
"step": 159732 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_loss": 5.684043884277344, |
|
"eval_runtime": 32.4092, |
|
"eval_samples_per_second": 299.76, |
|
"eval_steps_per_second": 6.819, |
|
"step": 159732 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"grad_norm": 0.0010831266408786178, |
|
"learning_rate": 4.676583276988957e-09, |
|
"loss": 0.0215, |
|
"step": 177480 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 5.624764919281006, |
|
"eval_runtime": 32.2837, |
|
"eval_samples_per_second": 300.926, |
|
"eval_steps_per_second": 6.846, |
|
"step": 177480 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"step": 177480, |
|
"total_flos": 2.803155588736635e+18, |
|
"train_loss": 0.05530680634294621, |
|
"train_runtime": 59846.6408, |
|
"train_samples_per_second": 118.561, |
|
"train_steps_per_second": 2.966 |
|
} |
|
], |
|
"logging_steps": 17748, |
|
"max_steps": 177480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 120, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.803155588736635e+18, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|