|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.11284046692607, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1556420233463035, |
|
"grad_norm": 5.363790512084961, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 2.5426, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.311284046692607, |
|
"grad_norm": 5.297606468200684, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 2.4551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4669260700389105, |
|
"grad_norm": 1.6734049320220947, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.5765, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.622568093385214, |
|
"grad_norm": 0.5996205806732178, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.9482, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"grad_norm": 0.5326632857322693, |
|
"learning_rate": 1.4994303528285384e-05, |
|
"loss": 0.7767, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7782101167315175, |
|
"eval_loss": 0.7420370578765869, |
|
"eval_runtime": 75.0351, |
|
"eval_samples_per_second": 3.052, |
|
"eval_steps_per_second": 1.533, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.933852140077821, |
|
"grad_norm": 0.47296807169914246, |
|
"learning_rate": 1.4948783661087592e-05, |
|
"loss": 0.6917, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0894941634241244, |
|
"grad_norm": 0.5496929883956909, |
|
"learning_rate": 1.485802041113141e-05, |
|
"loss": 0.5968, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.245136186770428, |
|
"grad_norm": 0.5656551122665405, |
|
"learning_rate": 1.4722565067948796e-05, |
|
"loss": 0.5096, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4007782101167314, |
|
"grad_norm": 0.7151289582252502, |
|
"learning_rate": 1.454324037767081e-05, |
|
"loss": 0.5053, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"grad_norm": 0.7926196455955505, |
|
"learning_rate": 1.4321135545726069e-05, |
|
"loss": 0.4193, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.556420233463035, |
|
"eval_loss": 0.4160374701023102, |
|
"eval_runtime": 75.0144, |
|
"eval_samples_per_second": 3.053, |
|
"eval_steps_per_second": 1.533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7120622568093387, |
|
"grad_norm": 0.7219254970550537, |
|
"learning_rate": 1.4057599621084365e-05, |
|
"loss": 0.3695, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.867704280155642, |
|
"grad_norm": 0.7828359007835388, |
|
"learning_rate": 1.3754233302229055e-05, |
|
"loss": 0.3554, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0233463035019454, |
|
"grad_norm": 0.6474707126617432, |
|
"learning_rate": 1.3412879214628194e-05, |
|
"loss": 0.3115, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.178988326848249, |
|
"grad_norm": 0.8858464360237122, |
|
"learning_rate": 1.3035610718758362e-05, |
|
"loss": 0.2754, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.3346303501945527, |
|
"grad_norm": 0.6484001278877258, |
|
"learning_rate": 1.2624719316660416e-05, |
|
"loss": 0.2815, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.3346303501945527, |
|
"eval_loss": 0.3077065050601959, |
|
"eval_runtime": 74.9901, |
|
"eval_samples_per_second": 3.054, |
|
"eval_steps_per_second": 1.534, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.490272373540856, |
|
"grad_norm": 0.9611061811447144, |
|
"learning_rate": 1.218270073351891e-05, |
|
"loss": 0.2641, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.6459143968871595, |
|
"grad_norm": 1.0954933166503906, |
|
"learning_rate": 1.1712239758804626e-05, |
|
"loss": 0.2675, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.801556420233463, |
|
"grad_norm": 0.7623236775398254, |
|
"learning_rate": 1.1216193939054056e-05, |
|
"loss": 0.26, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.9571984435797667, |
|
"grad_norm": 0.8693030476570129, |
|
"learning_rate": 1.0697576221334781e-05, |
|
"loss": 0.2494, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.11284046692607, |
|
"grad_norm": 0.9107272028923035, |
|
"learning_rate": 1.0159536652819018e-05, |
|
"loss": 0.2131, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.11284046692607, |
|
"eval_loss": 0.26981863379478455, |
|
"eval_runtime": 75.23, |
|
"eval_samples_per_second": 3.044, |
|
"eval_steps_per_second": 1.529, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 448, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9392870840113562e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|