|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9823874755381605, |
|
"eval_steps": 500, |
|
"global_step": 381, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.28035762906074524, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 1.4971, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.6015884876251221, |
|
"learning_rate": 2.9035087719298245e-05, |
|
"loss": 1.2201, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.7144075036048889, |
|
"learning_rate": 2.6842105263157896e-05, |
|
"loss": 1.1631, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.8597971200942993, |
|
"learning_rate": 2.4649122807017547e-05, |
|
"loss": 1.0016, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.7555738091468811, |
|
"learning_rate": 2.245614035087719e-05, |
|
"loss": 1.029, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.8726145029067993, |
|
"learning_rate": 2.0263157894736842e-05, |
|
"loss": 0.9495, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.8828539252281189, |
|
"learning_rate": 1.8070175438596493e-05, |
|
"loss": 0.9688, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.9864718914031982, |
|
"learning_rate": 1.5877192982456144e-05, |
|
"loss": 0.9321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.8643972277641296, |
|
"learning_rate": 1.368421052631579e-05, |
|
"loss": 0.9408, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.9563308954238892, |
|
"learning_rate": 1.1491228070175439e-05, |
|
"loss": 0.8721, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.9510623812675476, |
|
"learning_rate": 9.298245614035088e-06, |
|
"loss": 0.8556, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.0566192865371704, |
|
"learning_rate": 7.105263157894737e-06, |
|
"loss": 0.8947, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.9196553230285645, |
|
"learning_rate": 4.912280701754386e-06, |
|
"loss": 0.7975, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.9796131253242493, |
|
"learning_rate": 2.719298245614035e-06, |
|
"loss": 0.8946, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 1.0158289670944214, |
|
"learning_rate": 5.263157894736842e-07, |
|
"loss": 0.837, |
|
"step": 375 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 381, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 6.235021019027866e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|