|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.012944774356402, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.472387178201e-05, |
|
"eval_loss": 1.764864444732666, |
|
"eval_runtime": 195.6761, |
|
"eval_samples_per_second": 33.249, |
|
"eval_steps_per_second": 16.624, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006472387178201, |
|
"grad_norm": 0.4425320625305176, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4183, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0012944774356402, |
|
"grad_norm": 0.7192811369895935, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4582, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0019417161534603, |
|
"grad_norm": 0.7565455436706543, |
|
"learning_rate": 0.0002, |
|
"loss": 1.362, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0025889548712804, |
|
"grad_norm": 0.8094374537467957, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2267, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0032361935891005, |
|
"grad_norm": 1.1294043064117432, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1154, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0032361935891005, |
|
"eval_loss": 1.1239283084869385, |
|
"eval_runtime": 194.799, |
|
"eval_samples_per_second": 33.399, |
|
"eval_steps_per_second": 16.699, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0038834323069206, |
|
"grad_norm": 0.9267818927764893, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1523, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0045306710247407, |
|
"grad_norm": 1.0530893802642822, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0864, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0051779097425608, |
|
"grad_norm": 0.9379938244819641, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0639, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0058251484603809, |
|
"grad_norm": 0.6152510643005371, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9727, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006472387178201, |
|
"grad_norm": 1.0862324237823486, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1407, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006472387178201, |
|
"eval_loss": 1.0274646282196045, |
|
"eval_runtime": 194.761, |
|
"eval_samples_per_second": 33.405, |
|
"eval_steps_per_second": 16.703, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0071196258960211, |
|
"grad_norm": 0.9663441181182861, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0741, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0077668646138412, |
|
"grad_norm": 1.29120934009552, |
|
"learning_rate": 0.0002, |
|
"loss": 1.044, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0084141033316613, |
|
"grad_norm": 0.9238539338111877, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0563, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0090613420494814, |
|
"grad_norm": 1.1232694387435913, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0642, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0097085807673015, |
|
"grad_norm": 0.957047700881958, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0433, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0097085807673015, |
|
"eval_loss": 0.9908371567726135, |
|
"eval_runtime": 194.8244, |
|
"eval_samples_per_second": 33.394, |
|
"eval_steps_per_second": 16.697, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0103558194851216, |
|
"grad_norm": 0.8797596096992493, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1071, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0110030582029417, |
|
"grad_norm": 0.90439373254776, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0443, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0116502969207618, |
|
"grad_norm": 1.0083608627319336, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0347, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0122975356385819, |
|
"grad_norm": 0.8776618838310242, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9837, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.012944774356402, |
|
"grad_norm": 0.9423546195030212, |
|
"learning_rate": 0.0002, |
|
"loss": 1.048, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.012944774356402, |
|
"eval_loss": 0.9568792581558228, |
|
"eval_runtime": 195.0298, |
|
"eval_samples_per_second": 33.359, |
|
"eval_steps_per_second": 16.68, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.751024089209242e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|