|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.84, |
|
"eval_steps": 500, |
|
"global_step": 72, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.4616, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000125, |
|
"loss": 2.2926, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001995184726672197, |
|
"loss": 1.0788, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019415440651830208, |
|
"loss": 0.3285, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.25046542286872864, |
|
"eval_runtime": 4.9708, |
|
"eval_samples_per_second": 20.118, |
|
"eval_steps_per_second": 2.615, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00018314696123025454, |
|
"loss": 0.2382, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016715589548470185, |
|
"loss": 0.2235, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001471396736825998, |
|
"loss": 0.2044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001242980179903264, |
|
"loss": 0.1944, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.21891021728515625, |
|
"eval_runtime": 4.0841, |
|
"eval_samples_per_second": 24.485, |
|
"eval_steps_per_second": 3.183, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1881, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.570198200967362e-05, |
|
"loss": 0.1768, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.286032631740023e-05, |
|
"loss": 0.1628, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.2844104515298155e-05, |
|
"loss": 0.1767, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 0.2127072662115097, |
|
"eval_runtime": 4.0548, |
|
"eval_samples_per_second": 24.662, |
|
"eval_steps_per_second": 3.206, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.6853038769745467e-05, |
|
"loss": 0.1663, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 5.8455934816979305e-06, |
|
"loss": 0.1468, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.815273327803182e-07, |
|
"loss": 0.1591, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 0.21221186220645905, |
|
"eval_runtime": 4.0724, |
|
"eval_samples_per_second": 24.556, |
|
"eval_steps_per_second": 3.192, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"step": 72, |
|
"total_flos": 40669301473280.0, |
|
"train_loss": 0.4049788423710399, |
|
"train_runtime": 399.0811, |
|
"train_samples_per_second": 3.007, |
|
"train_steps_per_second": 0.18 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 72, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 40669301473280.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|