|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1933, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05173305742369374, |
|
"grad_norm": 0.20534886419773102, |
|
"learning_rate": 1.0362694300518135e-05, |
|
"loss": 2.372, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10346611484738748, |
|
"grad_norm": 0.3482538163661957, |
|
"learning_rate": 1.9999201340701767e-05, |
|
"loss": 2.2663, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1551991722710812, |
|
"grad_norm": 0.5800974369049072, |
|
"learning_rate": 1.981396800312055e-05, |
|
"loss": 2.1092, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.20693222969477496, |
|
"grad_norm": 0.8034837245941162, |
|
"learning_rate": 1.9309679502465225e-05, |
|
"loss": 1.9755, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2586652871184687, |
|
"grad_norm": 0.8570711016654968, |
|
"learning_rate": 1.8502730415298682e-05, |
|
"loss": 1.887, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3103983445421624, |
|
"grad_norm": 0.9559773206710815, |
|
"learning_rate": 1.741935490832257e-05, |
|
"loss": 1.8283, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3621314019658562, |
|
"grad_norm": 0.9662678837776184, |
|
"learning_rate": 1.6094773857429165e-05, |
|
"loss": 1.8393, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4138644593895499, |
|
"grad_norm": 1.23440682888031, |
|
"learning_rate": 1.4572049806032437e-05, |
|
"loss": 1.7, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4655975168132437, |
|
"grad_norm": 1.1646474599838257, |
|
"learning_rate": 1.2900686988340288e-05, |
|
"loss": 1.703, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5173305742369374, |
|
"grad_norm": 1.043607473373413, |
|
"learning_rate": 1.1135021931248375e-05, |
|
"loss": 1.6366, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5690636316606311, |
|
"grad_norm": 1.163309931755066, |
|
"learning_rate": 9.332456956890377e-06, |
|
"loss": 1.6961, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6207966890843248, |
|
"grad_norm": 1.1546854972839355, |
|
"learning_rate": 7.551594015228087e-06, |
|
"loss": 1.6418, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6725297465080187, |
|
"grad_norm": 1.1681387424468994, |
|
"learning_rate": 5.850329516366368e-06, |
|
"loss": 1.6815, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7242628039317124, |
|
"grad_norm": 1.1528677940368652, |
|
"learning_rate": 4.283972100189098e-06, |
|
"loss": 1.6221, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7759958613554061, |
|
"grad_norm": 1.3576257228851318, |
|
"learning_rate": 2.903444535212738e-06, |
|
"loss": 1.6389, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8277289187790998, |
|
"grad_norm": 1.1139311790466309, |
|
"learning_rate": 1.7536282034870066e-06, |
|
"loss": 1.6398, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8794619762027935, |
|
"grad_norm": 1.1810859441757202, |
|
"learning_rate": 8.71903992855374e-07, |
|
"loss": 1.678, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9311950336264874, |
|
"grad_norm": 1.1703745126724243, |
|
"learning_rate": 2.8693703262333894e-07, |
|
"loss": 1.6531, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9829280910501811, |
|
"grad_norm": 1.3099007606506348, |
|
"learning_rate": 1.774478125728729e-08, |
|
"loss": 1.653, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.5814313888549805, |
|
"eval_runtime": 31.0467, |
|
"eval_samples_per_second": 13.464, |
|
"eval_steps_per_second": 1.707, |
|
"step": 1933 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1933, |
|
"total_flos": 3.52000008192e+16, |
|
"train_loss": 1.7970823928351485, |
|
"train_runtime": 655.195, |
|
"train_samples_per_second": 5.899, |
|
"train_steps_per_second": 2.95 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1933, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.52000008192e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|