thomnis's picture
Training in progress, step 4770
0b40aaf verified
raw
history blame
7.02 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 500,
"global_step": 4770,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9968553459119497,
"grad_norm": 0.32578566670417786,
"learning_rate": 0.0002396304976085577,
"loss": 0.1826,
"step": 317
},
{
"epoch": 1.0,
"eval_accuracy": 0.8974193548387097,
"eval_loss": 0.05033092945814133,
"eval_runtime": 5.4105,
"eval_samples_per_second": 572.963,
"eval_steps_per_second": 12.014,
"step": 318
},
{
"epoch": 1.9937106918238994,
"grad_norm": 0.4084835350513458,
"learning_rate": 0.0002225716905701762,
"loss": 0.0428,
"step": 634
},
{
"epoch": 2.0,
"eval_accuracy": 0.9219354838709677,
"eval_loss": 0.03651176020503044,
"eval_runtime": 5.4732,
"eval_samples_per_second": 566.398,
"eval_steps_per_second": 11.876,
"step": 636
},
{
"epoch": 2.990566037735849,
"grad_norm": 0.201192244887352,
"learning_rate": 0.00020551288353179472,
"loss": 0.0307,
"step": 951
},
{
"epoch": 3.0,
"eval_accuracy": 0.9329032258064516,
"eval_loss": 0.029978184029459953,
"eval_runtime": 5.4216,
"eval_samples_per_second": 571.786,
"eval_steps_per_second": 11.989,
"step": 954
},
{
"epoch": 3.9874213836477987,
"grad_norm": 0.1681308001279831,
"learning_rate": 0.0001884540764934132,
"loss": 0.0246,
"step": 1268
},
{
"epoch": 4.0,
"eval_accuracy": 0.9345161290322581,
"eval_loss": 0.02788924239575863,
"eval_runtime": 5.388,
"eval_samples_per_second": 575.348,
"eval_steps_per_second": 12.064,
"step": 1272
},
{
"epoch": 4.984276729559748,
"grad_norm": 0.07124509662389755,
"learning_rate": 0.00017139526945503171,
"loss": 0.0214,
"step": 1585
},
{
"epoch": 5.0,
"eval_accuracy": 0.937741935483871,
"eval_loss": 0.025299660861492157,
"eval_runtime": 5.4307,
"eval_samples_per_second": 570.832,
"eval_steps_per_second": 11.969,
"step": 1590
},
{
"epoch": 5.981132075471698,
"grad_norm": 0.09666649252176285,
"learning_rate": 0.00015433646241665023,
"loss": 0.0199,
"step": 1902
},
{
"epoch": 6.0,
"eval_accuracy": 0.94,
"eval_loss": 0.02704489417374134,
"eval_runtime": 5.448,
"eval_samples_per_second": 569.011,
"eval_steps_per_second": 11.931,
"step": 1908
},
{
"epoch": 6.977987421383648,
"grad_norm": 0.06059594824910164,
"learning_rate": 0.00013727765537826874,
"loss": 0.0188,
"step": 2219
},
{
"epoch": 7.0,
"eval_accuracy": 0.9435483870967742,
"eval_loss": 0.023246513679623604,
"eval_runtime": 5.3993,
"eval_samples_per_second": 574.153,
"eval_steps_per_second": 12.039,
"step": 2226
},
{
"epoch": 7.9748427672955975,
"grad_norm": 0.06167187541723251,
"learning_rate": 0.00012021884833988725,
"loss": 0.0172,
"step": 2536
},
{
"epoch": 8.0,
"eval_accuracy": 0.9435483870967742,
"eval_loss": 0.02241097390651703,
"eval_runtime": 5.4121,
"eval_samples_per_second": 572.788,
"eval_steps_per_second": 12.01,
"step": 2544
},
{
"epoch": 8.971698113207546,
"grad_norm": 0.06807317584753036,
"learning_rate": 0.00010316004130150575,
"loss": 0.016,
"step": 2853
},
{
"epoch": 9.0,
"eval_accuracy": 0.9432258064516129,
"eval_loss": 0.022413650527596474,
"eval_runtime": 5.4391,
"eval_samples_per_second": 569.951,
"eval_steps_per_second": 11.951,
"step": 2862
},
{
"epoch": 9.968553459119496,
"grad_norm": 0.05246179923415184,
"learning_rate": 8.610123426312426e-05,
"loss": 0.0151,
"step": 3170
},
{
"epoch": 10.0,
"eval_accuracy": 0.9483870967741935,
"eval_loss": 0.02093937061727047,
"eval_runtime": 5.4466,
"eval_samples_per_second": 569.163,
"eval_steps_per_second": 11.934,
"step": 3180
},
{
"epoch": 10.965408805031446,
"grad_norm": 0.06324920058250427,
"learning_rate": 6.904242722474276e-05,
"loss": 0.0143,
"step": 3487
},
{
"epoch": 11.0,
"eval_accuracy": 0.9474193548387096,
"eval_loss": 0.020307855680584908,
"eval_runtime": 5.4319,
"eval_samples_per_second": 570.701,
"eval_steps_per_second": 11.966,
"step": 3498
},
{
"epoch": 11.962264150943396,
"grad_norm": 0.049960751086473465,
"learning_rate": 5.198362018636127e-05,
"loss": 0.0137,
"step": 3804
},
{
"epoch": 12.0,
"eval_accuracy": 0.9458064516129032,
"eval_loss": 0.02009761333465576,
"eval_runtime": 5.415,
"eval_samples_per_second": 572.484,
"eval_steps_per_second": 12.004,
"step": 3816
},
{
"epoch": 12.959119496855346,
"grad_norm": 0.05622159317135811,
"learning_rate": 3.492481314797978e-05,
"loss": 0.0132,
"step": 4121
},
{
"epoch": 13.0,
"eval_accuracy": 0.9464516129032258,
"eval_loss": 0.019574137404561043,
"eval_runtime": 5.4482,
"eval_samples_per_second": 568.993,
"eval_steps_per_second": 11.93,
"step": 4134
},
{
"epoch": 13.955974842767295,
"grad_norm": 0.04851706698536873,
"learning_rate": 1.7866006109598282e-05,
"loss": 0.0127,
"step": 4438
},
{
"epoch": 14.0,
"eval_accuracy": 0.9467741935483871,
"eval_loss": 0.01893083192408085,
"eval_runtime": 5.4638,
"eval_samples_per_second": 567.371,
"eval_steps_per_second": 11.896,
"step": 4452
},
{
"epoch": 14.952830188679245,
"grad_norm": 0.04642177000641823,
"learning_rate": 8.071990712167899e-07,
"loss": 0.0123,
"step": 4755
}
],
"logging_steps": 317,
"max_steps": 4770,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 1000000000.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1237283192012448.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": {
"alpha": 0.9580201221394028,
"learning_rate": 0.0002566893046469392,
"lr_scheduler_type": "linear",
"num_train_epochs": 15,
"temperature": 9.50870227107101,
"weight_decay": 0.06288851655927655
}
}