thomnis's picture
Training in progress, step 3180
0d29477 verified
raw
history blame
4.91 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 3180,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9968553459119497,
"grad_norm": 0.9422912001609802,
"learning_rate": 0.00044485348269869936,
"loss": 0.2476,
"step": 317
},
{
"epoch": 1.0,
"eval_accuracy": 0.7641935483870967,
"eval_loss": 0.10522185266017914,
"eval_runtime": 5.3982,
"eval_samples_per_second": 574.263,
"eval_steps_per_second": 12.041,
"step": 318
},
{
"epoch": 1.9937106918238994,
"grad_norm": 0.2760656476020813,
"learning_rate": 0.0003955979626094617,
"loss": 0.0756,
"step": 634
},
{
"epoch": 2.0,
"eval_accuracy": 0.854516129032258,
"eval_loss": 0.06962967664003372,
"eval_runtime": 5.4277,
"eval_samples_per_second": 571.144,
"eval_steps_per_second": 11.976,
"step": 636
},
{
"epoch": 2.990566037735849,
"grad_norm": 1.5241812467575073,
"learning_rate": 0.0003463424425202239,
"loss": 0.0447,
"step": 951
},
{
"epoch": 3.0,
"eval_accuracy": 0.8735483870967742,
"eval_loss": 0.05896682292222977,
"eval_runtime": 5.3747,
"eval_samples_per_second": 576.777,
"eval_steps_per_second": 12.094,
"step": 954
},
{
"epoch": 3.9874213836477987,
"grad_norm": 0.3112484812736511,
"learning_rate": 0.0002970869224309861,
"loss": 0.0337,
"step": 1268
},
{
"epoch": 4.0,
"eval_accuracy": 0.9064516129032258,
"eval_loss": 0.04424069821834564,
"eval_runtime": 5.449,
"eval_samples_per_second": 568.916,
"eval_steps_per_second": 11.929,
"step": 1272
},
{
"epoch": 4.984276729559748,
"grad_norm": 0.06658962368965149,
"learning_rate": 0.00024783140234174836,
"loss": 0.0262,
"step": 1585
},
{
"epoch": 5.0,
"eval_accuracy": 0.9090322580645162,
"eval_loss": 0.04311687871813774,
"eval_runtime": 5.4423,
"eval_samples_per_second": 569.617,
"eval_steps_per_second": 11.944,
"step": 1590
},
{
"epoch": 5.981132075471698,
"grad_norm": 0.15654200315475464,
"learning_rate": 0.0001985758822525106,
"loss": 0.0223,
"step": 1902
},
{
"epoch": 6.0,
"eval_accuracy": 0.9141935483870968,
"eval_loss": 0.03970247507095337,
"eval_runtime": 5.399,
"eval_samples_per_second": 574.182,
"eval_steps_per_second": 12.039,
"step": 1908
},
{
"epoch": 6.977987421383648,
"grad_norm": 0.06671025604009628,
"learning_rate": 0.00014932036216327283,
"loss": 0.0194,
"step": 2219
},
{
"epoch": 7.0,
"eval_accuracy": 0.917741935483871,
"eval_loss": 0.03794796019792557,
"eval_runtime": 5.3981,
"eval_samples_per_second": 574.279,
"eval_steps_per_second": 12.041,
"step": 2226
},
{
"epoch": 7.9748427672955975,
"grad_norm": 0.06658264994621277,
"learning_rate": 0.00010006484207403508,
"loss": 0.0179,
"step": 2536
},
{
"epoch": 8.0,
"eval_accuracy": 0.9232258064516129,
"eval_loss": 0.03362081199884415,
"eval_runtime": 5.4142,
"eval_samples_per_second": 572.564,
"eval_steps_per_second": 12.005,
"step": 2544
},
{
"epoch": 8.971698113207546,
"grad_norm": 0.06578544527292252,
"learning_rate": 5.080932198479731e-05,
"loss": 0.0164,
"step": 2853
},
{
"epoch": 9.0,
"eval_accuracy": 0.92,
"eval_loss": 0.032231684774160385,
"eval_runtime": 5.4161,
"eval_samples_per_second": 572.368,
"eval_steps_per_second": 12.001,
"step": 2862
},
{
"epoch": 9.968553459119496,
"grad_norm": 0.0489405132830143,
"learning_rate": 1.5538018955595509e-06,
"loss": 0.0152,
"step": 3170
}
],
"logging_steps": 317,
"max_steps": 3180,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000000000.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 825254092458012.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": {
"alpha": 0.7440020908825002,
"learning_rate": 0.0004941090027879372,
"lr_scheduler_type": "linear",
"num_train_epochs": 10,
"temperature": 13.346731795373474,
"weight_decay": 0.2513528053510888
}
}