dzanbek's picture
Training in progress, step 20, checkpoint
ed3ca3d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0006815122757398668,
"eval_steps": 2,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.407561378699334e-05,
"grad_norm": 2.638183116912842,
"learning_rate": 1e-05,
"loss": 44.3756,
"step": 1
},
{
"epoch": 3.407561378699334e-05,
"eval_loss": 11.093536376953125,
"eval_runtime": 113.9897,
"eval_samples_per_second": 108.405,
"eval_steps_per_second": 54.207,
"step": 1
},
{
"epoch": 6.815122757398668e-05,
"grad_norm": 2.178905963897705,
"learning_rate": 2e-05,
"loss": 44.2993,
"step": 2
},
{
"epoch": 6.815122757398668e-05,
"eval_loss": 11.093539237976074,
"eval_runtime": 114.5009,
"eval_samples_per_second": 107.921,
"eval_steps_per_second": 53.965,
"step": 2
},
{
"epoch": 0.00010222684136098001,
"grad_norm": 2.257598876953125,
"learning_rate": 3e-05,
"loss": 44.3175,
"step": 3
},
{
"epoch": 0.00013630245514797336,
"grad_norm": 3.2154581546783447,
"learning_rate": 4e-05,
"loss": 44.4066,
"step": 4
},
{
"epoch": 0.00013630245514797336,
"eval_loss": 11.092741012573242,
"eval_runtime": 114.161,
"eval_samples_per_second": 108.242,
"eval_steps_per_second": 54.125,
"step": 4
},
{
"epoch": 0.0001703780689349667,
"grad_norm": 2.7002158164978027,
"learning_rate": 5e-05,
"loss": 44.4089,
"step": 5
},
{
"epoch": 0.00020445368272196002,
"grad_norm": 2.5694777965545654,
"learning_rate": 6e-05,
"loss": 44.3066,
"step": 6
},
{
"epoch": 0.00020445368272196002,
"eval_loss": 11.091360092163086,
"eval_runtime": 114.2137,
"eval_samples_per_second": 108.192,
"eval_steps_per_second": 54.1,
"step": 6
},
{
"epoch": 0.00023852929650895337,
"grad_norm": 2.498781442642212,
"learning_rate": 7e-05,
"loss": 44.3137,
"step": 7
},
{
"epoch": 0.0002726049102959467,
"grad_norm": 2.4782936573028564,
"learning_rate": 8e-05,
"loss": 44.3997,
"step": 8
},
{
"epoch": 0.0002726049102959467,
"eval_loss": 11.08991527557373,
"eval_runtime": 114.0478,
"eval_samples_per_second": 108.349,
"eval_steps_per_second": 54.179,
"step": 8
},
{
"epoch": 0.00030668052408294006,
"grad_norm": 2.774634599685669,
"learning_rate": 9e-05,
"loss": 44.3353,
"step": 9
},
{
"epoch": 0.0003407561378699334,
"grad_norm": 2.3270585536956787,
"learning_rate": 0.0001,
"loss": 44.4153,
"step": 10
},
{
"epoch": 0.0003407561378699334,
"eval_loss": 11.087890625,
"eval_runtime": 114.1863,
"eval_samples_per_second": 108.218,
"eval_steps_per_second": 54.113,
"step": 10
},
{
"epoch": 0.0003748317516569267,
"grad_norm": 2.2423906326293945,
"learning_rate": 9.755282581475769e-05,
"loss": 44.3149,
"step": 11
},
{
"epoch": 0.00040890736544392005,
"grad_norm": 2.1097352504730225,
"learning_rate": 9.045084971874738e-05,
"loss": 44.4146,
"step": 12
},
{
"epoch": 0.00040890736544392005,
"eval_loss": 11.085128784179688,
"eval_runtime": 114.1984,
"eval_samples_per_second": 108.206,
"eval_steps_per_second": 54.108,
"step": 12
},
{
"epoch": 0.0004429829792309134,
"grad_norm": 2.6091725826263428,
"learning_rate": 7.938926261462366e-05,
"loss": 44.3838,
"step": 13
},
{
"epoch": 0.00047705859301790674,
"grad_norm": 2.9170174598693848,
"learning_rate": 6.545084971874738e-05,
"loss": 44.4147,
"step": 14
},
{
"epoch": 0.00047705859301790674,
"eval_loss": 11.082711219787598,
"eval_runtime": 114.2172,
"eval_samples_per_second": 108.189,
"eval_steps_per_second": 54.099,
"step": 14
},
{
"epoch": 0.0005111342068049,
"grad_norm": 2.4831223487854004,
"learning_rate": 5e-05,
"loss": 44.2988,
"step": 15
},
{
"epoch": 0.0005452098205918934,
"grad_norm": 2.7560153007507324,
"learning_rate": 3.4549150281252636e-05,
"loss": 44.2967,
"step": 16
},
{
"epoch": 0.0005452098205918934,
"eval_loss": 11.08093547821045,
"eval_runtime": 114.1435,
"eval_samples_per_second": 108.258,
"eval_steps_per_second": 54.134,
"step": 16
},
{
"epoch": 0.0005792854343788867,
"grad_norm": 2.718780040740967,
"learning_rate": 2.061073738537635e-05,
"loss": 44.2053,
"step": 17
},
{
"epoch": 0.0006133610481658801,
"grad_norm": 2.361675262451172,
"learning_rate": 9.549150281252633e-06,
"loss": 44.2844,
"step": 18
},
{
"epoch": 0.0006133610481658801,
"eval_loss": 11.079801559448242,
"eval_runtime": 114.2329,
"eval_samples_per_second": 108.174,
"eval_steps_per_second": 54.091,
"step": 18
},
{
"epoch": 0.0006474366619528734,
"grad_norm": 2.549006938934326,
"learning_rate": 2.4471741852423237e-06,
"loss": 44.2642,
"step": 19
},
{
"epoch": 0.0006815122757398668,
"grad_norm": 2.272350549697876,
"learning_rate": 0.0,
"loss": 44.2192,
"step": 20
},
{
"epoch": 0.0006815122757398668,
"eval_loss": 11.079532623291016,
"eval_runtime": 114.2157,
"eval_samples_per_second": 108.19,
"eval_steps_per_second": 54.099,
"step": 20
}
],
"logging_steps": 1,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 55215390720.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}