|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.46153846153846156, |
|
"eval_steps": 10, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006153846153846154, |
|
"eval_loss": 1.489196538925171, |
|
"eval_runtime": 1.2747, |
|
"eval_samples_per_second": 89.432, |
|
"eval_steps_per_second": 4.707, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"grad_norm": 17.32219123840332, |
|
"learning_rate": 8.771929824561404e-07, |
|
"loss": 1.5722, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"eval_loss": 1.3865292072296143, |
|
"eval_runtime": 1.1749, |
|
"eval_samples_per_second": 97.029, |
|
"eval_steps_per_second": 5.107, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"grad_norm": 21.88077735900879, |
|
"learning_rate": 1.7543859649122807e-06, |
|
"loss": 1.3935, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"eval_loss": 1.0996264219284058, |
|
"eval_runtime": 1.1706, |
|
"eval_samples_per_second": 97.386, |
|
"eval_steps_per_second": 5.126, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"grad_norm": 17.395177841186523, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 1.0664, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"eval_loss": 0.8006665706634521, |
|
"eval_runtime": 1.1747, |
|
"eval_samples_per_second": 97.044, |
|
"eval_steps_per_second": 5.108, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"grad_norm": 6.869284152984619, |
|
"learning_rate": 3.5087719298245615e-06, |
|
"loss": 0.7994, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"eval_loss": 0.708875298500061, |
|
"eval_runtime": 1.1742, |
|
"eval_samples_per_second": 97.087, |
|
"eval_steps_per_second": 5.11, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 6.4737162590026855, |
|
"learning_rate": 4.385964912280702e-06, |
|
"loss": 0.7751, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"eval_loss": 0.6778659224510193, |
|
"eval_runtime": 1.1722, |
|
"eval_samples_per_second": 97.257, |
|
"eval_steps_per_second": 5.119, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"grad_norm": 5.418182373046875, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.6203, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"eval_loss": 0.6695793271064758, |
|
"eval_runtime": 1.1744, |
|
"eval_samples_per_second": 97.071, |
|
"eval_steps_per_second": 5.109, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2153846153846154, |
|
"grad_norm": 2.7423934936523438, |
|
"learning_rate": 6.140350877192983e-06, |
|
"loss": 0.767, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2153846153846154, |
|
"eval_loss": 0.6650205850601196, |
|
"eval_runtime": 1.1671, |
|
"eval_samples_per_second": 97.675, |
|
"eval_steps_per_second": 5.141, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 2.572129726409912, |
|
"learning_rate": 7.017543859649123e-06, |
|
"loss": 0.6336, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"eval_loss": 0.6613836884498596, |
|
"eval_runtime": 1.1671, |
|
"eval_samples_per_second": 97.676, |
|
"eval_steps_per_second": 5.141, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27692307692307694, |
|
"grad_norm": 4.622325420379639, |
|
"learning_rate": 7.894736842105265e-06, |
|
"loss": 0.631, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.27692307692307694, |
|
"eval_loss": 0.6617783904075623, |
|
"eval_runtime": 1.1744, |
|
"eval_samples_per_second": 97.071, |
|
"eval_steps_per_second": 5.109, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 3.24354887008667, |
|
"learning_rate": 8.771929824561405e-06, |
|
"loss": 0.6086, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"eval_loss": 0.662232518196106, |
|
"eval_runtime": 1.172, |
|
"eval_samples_per_second": 97.27, |
|
"eval_steps_per_second": 5.119, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3384615384615385, |
|
"grad_norm": 3.5868513584136963, |
|
"learning_rate": 9.649122807017545e-06, |
|
"loss": 0.7057, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3384615384615385, |
|
"eval_loss": 0.6619779467582703, |
|
"eval_runtime": 1.165, |
|
"eval_samples_per_second": 97.856, |
|
"eval_steps_per_second": 5.15, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"grad_norm": 3.4894511699676514, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.7385, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"eval_loss": 0.6673641800880432, |
|
"eval_runtime": 1.1682, |
|
"eval_samples_per_second": 97.588, |
|
"eval_steps_per_second": 5.136, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.117406845092773, |
|
"learning_rate": 1.1403508771929826e-05, |
|
"loss": 0.6533, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.6705042123794556, |
|
"eval_runtime": 1.1726, |
|
"eval_samples_per_second": 97.217, |
|
"eval_steps_per_second": 5.117, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.4307692307692308, |
|
"grad_norm": 3.9341437816619873, |
|
"learning_rate": 1.2280701754385966e-05, |
|
"loss": 0.7066, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4307692307692308, |
|
"eval_loss": 0.6691470146179199, |
|
"eval_runtime": 1.1702, |
|
"eval_samples_per_second": 97.418, |
|
"eval_steps_per_second": 5.127, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 3.4131579399108887, |
|
"learning_rate": 1.3157894736842108e-05, |
|
"loss": 0.6065, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"eval_loss": 0.6724759340286255, |
|
"eval_runtime": 1.1746, |
|
"eval_samples_per_second": 97.05, |
|
"eval_steps_per_second": 5.108, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2275, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 50, |
|
"total_flos": 9451377658953728.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|