|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.918918918918919, |
|
"eval_steps": 500, |
|
"global_step": 81, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"grad_norm": 24.162766925724526, |
|
"learning_rate": 5e-06, |
|
"loss": 1.2352, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"grad_norm": 2.460851975181821, |
|
"learning_rate": 5e-06, |
|
"loss": 1.129, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.972972972972973, |
|
"eval_loss": 1.0856622457504272, |
|
"eval_runtime": 20.0808, |
|
"eval_samples_per_second": 37.249, |
|
"eval_steps_per_second": 0.598, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 1.5697438122223701, |
|
"learning_rate": 5e-06, |
|
"loss": 1.1105, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4414414414414414, |
|
"grad_norm": 1.4204201478767204, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0347, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"grad_norm": 1.4140162476830664, |
|
"learning_rate": 5e-06, |
|
"loss": 1.015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.981981981981982, |
|
"eval_loss": 1.0293084383010864, |
|
"eval_runtime": 19.208, |
|
"eval_samples_per_second": 38.942, |
|
"eval_steps_per_second": 0.625, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 2.291804896225563, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0103, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.5225225225225225, |
|
"grad_norm": 1.4769259269396882, |
|
"learning_rate": 5e-06, |
|
"loss": 0.9502, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.8828828828828827, |
|
"grad_norm": 1.4935265215970417, |
|
"learning_rate": 5e-06, |
|
"loss": 0.939, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.918918918918919, |
|
"eval_loss": 1.0073624849319458, |
|
"eval_runtime": 17.8833, |
|
"eval_samples_per_second": 41.827, |
|
"eval_steps_per_second": 0.671, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.918918918918919, |
|
"step": 81, |
|
"total_flos": 135468637224960.0, |
|
"train_loss": 1.051608243106324, |
|
"train_runtime": 2954.7854, |
|
"train_samples_per_second": 14.423, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 81, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 135468637224960.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|