|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.028011204481792718, |
|
"eval_steps": 5, |
|
"global_step": 15, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018674136321195146, |
|
"grad_norm": 11.52504825592041, |
|
"learning_rate": 2e-05, |
|
"loss": 8.6504, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018674136321195146, |
|
"eval_loss": 8.316863059997559, |
|
"eval_runtime": 8.1714, |
|
"eval_samples_per_second": 27.658, |
|
"eval_steps_per_second": 13.829, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003734827264239029, |
|
"grad_norm": 9.959823608398438, |
|
"learning_rate": 4e-05, |
|
"loss": 7.6632, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0056022408963585435, |
|
"grad_norm": 7.954950332641602, |
|
"learning_rate": 6e-05, |
|
"loss": 8.8727, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007469654528478058, |
|
"grad_norm": 9.595186233520508, |
|
"learning_rate": 8e-05, |
|
"loss": 8.6758, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009337068160597572, |
|
"grad_norm": 10.132702827453613, |
|
"learning_rate": 0.0001, |
|
"loss": 8.1186, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009337068160597572, |
|
"eval_loss": 7.952444553375244, |
|
"eval_runtime": 7.5626, |
|
"eval_samples_per_second": 29.884, |
|
"eval_steps_per_second": 14.942, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011204481792717087, |
|
"grad_norm": 8.388951301574707, |
|
"learning_rate": 0.00012, |
|
"loss": 7.557, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.013071895424836602, |
|
"grad_norm": 14.64582347869873, |
|
"learning_rate": 0.00014, |
|
"loss": 8.6447, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.014939309056956116, |
|
"grad_norm": 12.940552711486816, |
|
"learning_rate": 0.00016, |
|
"loss": 6.7169, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01680672268907563, |
|
"grad_norm": 13.50047779083252, |
|
"learning_rate": 0.00018, |
|
"loss": 5.8296, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.018674136321195144, |
|
"grad_norm": 16.245317459106445, |
|
"learning_rate": 0.0002, |
|
"loss": 6.6651, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018674136321195144, |
|
"eval_loss": 5.606720924377441, |
|
"eval_runtime": 7.6047, |
|
"eval_samples_per_second": 29.718, |
|
"eval_steps_per_second": 14.859, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02054154995331466, |
|
"grad_norm": 14.252686500549316, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 5.4462, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.022408963585434174, |
|
"grad_norm": 13.769002914428711, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 5.9482, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02427637721755369, |
|
"grad_norm": 14.226924896240234, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 5.444, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.026143790849673203, |
|
"grad_norm": 12.833412170410156, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 4.164, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.028011204481792718, |
|
"grad_norm": 29.60838508605957, |
|
"learning_rate": 0.0001, |
|
"loss": 5.389, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.028011204481792718, |
|
"eval_loss": 5.058675289154053, |
|
"eval_runtime": 7.6248, |
|
"eval_samples_per_second": 29.64, |
|
"eval_steps_per_second": 14.82, |
|
"step": 15 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1451583854346240.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|