|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9975812953507122, |
|
"eval_steps": 500, |
|
"global_step": 232, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08599838753023381, |
|
"grad_norm": 1.4894707202911377, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 10.952, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17199677506046762, |
|
"grad_norm": 0.6050359010696411, |
|
"learning_rate": 9.85470908713026e-05, |
|
"loss": 8.8962, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25799516259070143, |
|
"grad_norm": 0.25468742847442627, |
|
"learning_rate": 9.278906361507238e-05, |
|
"loss": 7.7551, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34399355012093524, |
|
"grad_norm": 0.44108685851097107, |
|
"learning_rate": 8.315613291203976e-05, |
|
"loss": 7.4342, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.42999193765116905, |
|
"grad_norm": 0.38378873467445374, |
|
"learning_rate": 7.052064027263786e-05, |
|
"loss": 7.2125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5159903251814029, |
|
"grad_norm": 0.7682523131370544, |
|
"learning_rate": 5.602683401276615e-05, |
|
"loss": 7.0084, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6019887127116367, |
|
"grad_norm": 0.5268513560295105, |
|
"learning_rate": 4.0987248109304714e-05, |
|
"loss": 6.817, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6879871002418705, |
|
"grad_norm": 0.7004027366638184, |
|
"learning_rate": 2.6763841397811573e-05, |
|
"loss": 6.6429, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.7739854877721043, |
|
"grad_norm": 1.4404789209365845, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 6.4766, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8599838753023381, |
|
"grad_norm": 0.3456955552101135, |
|
"learning_rate": 5.727198717339511e-06, |
|
"loss": 6.3555, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9459822628325719, |
|
"grad_norm": 0.23901048302650452, |
|
"learning_rate": 8.190046526428242e-07, |
|
"loss": 6.3162, |
|
"step": 220 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 232, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.384639693215236e+18, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|