|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"eval_steps": 4350, |
|
"global_step": 43500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.00021520511654671282, |
|
"learning_rate": 9.000919540229886e-06, |
|
"loss": 0.48, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.651794195175171, |
|
"eval_runtime": 55.7113, |
|
"eval_samples_per_second": 311.66, |
|
"eval_steps_per_second": 7.09, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.00017535020015202463, |
|
"learning_rate": 8.001379310344829e-06, |
|
"loss": 0.1193, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 3.453643798828125, |
|
"eval_runtime": 55.7389, |
|
"eval_samples_per_second": 311.506, |
|
"eval_steps_per_second": 7.087, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.18407990038394928, |
|
"learning_rate": 7.00183908045977e-06, |
|
"loss": 0.0925, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 3.4169740676879883, |
|
"eval_runtime": 55.9578, |
|
"eval_samples_per_second": 310.287, |
|
"eval_steps_per_second": 7.059, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 8.286705269711092e-05, |
|
"learning_rate": 6.002298850574713e-06, |
|
"loss": 0.0827, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 3.3203234672546387, |
|
"eval_runtime": 55.9217, |
|
"eval_samples_per_second": 310.488, |
|
"eval_steps_per_second": 7.063, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.0003959204477723688, |
|
"learning_rate": 5.002758620689656e-06, |
|
"loss": 0.0755, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 3.196115732192993, |
|
"eval_runtime": 55.4125, |
|
"eval_samples_per_second": 313.341, |
|
"eval_steps_per_second": 7.128, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 7.149695011321455e-05, |
|
"learning_rate": 4.003218390804598e-06, |
|
"loss": 0.0685, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 3.110957145690918, |
|
"eval_runtime": 55.9729, |
|
"eval_samples_per_second": 310.204, |
|
"eval_steps_per_second": 7.057, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 0.013322222046554089, |
|
"learning_rate": 3.0036781609195404e-06, |
|
"loss": 0.0664, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 3.0240492820739746, |
|
"eval_runtime": 56.1145, |
|
"eval_samples_per_second": 309.421, |
|
"eval_steps_per_second": 7.039, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 2.9004069801885635e-05, |
|
"learning_rate": 2.004137931034483e-06, |
|
"loss": 0.0621, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 2.9804065227508545, |
|
"eval_runtime": 55.7487, |
|
"eval_samples_per_second": 311.451, |
|
"eval_steps_per_second": 7.085, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 0.00013569927250500768, |
|
"learning_rate": 1.0045977011494254e-06, |
|
"loss": 0.0596, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 2.895709276199341, |
|
"eval_runtime": 55.8728, |
|
"eval_samples_per_second": 310.76, |
|
"eval_steps_per_second": 7.07, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 0.0004280584107618779, |
|
"learning_rate": 4.827586206896552e-09, |
|
"loss": 0.0576, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 2.854401111602783, |
|
"eval_runtime": 55.466, |
|
"eval_samples_per_second": 313.038, |
|
"eval_steps_per_second": 7.121, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 43500, |
|
"total_flos": 6.859464121840128e+17, |
|
"train_loss": 0.11642176906541846, |
|
"train_runtime": 15360.6557, |
|
"train_samples_per_second": 113.036, |
|
"train_steps_per_second": 2.832 |
|
} |
|
], |
|
"logging_steps": 4350, |
|
"max_steps": 43500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.859464121840128e+17, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|