eatbeans2's picture
Upload folder using huggingface_hub
5fcbb28
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.272727272727273,
"global_step": 300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.45,
"learning_rate": 7e-06,
"loss": 2.6772,
"step": 5
},
{
"epoch": 0.45,
"eval_loss": 2.7022464275360107,
"eval_runtime": 0.7068,
"eval_samples_per_second": 8.489,
"eval_steps_per_second": 1.415,
"step": 5
},
{
"epoch": 0.91,
"learning_rate": 7e-06,
"loss": 2.8149,
"step": 10
},
{
"epoch": 0.91,
"eval_loss": 2.6998846530914307,
"eval_runtime": 0.7138,
"eval_samples_per_second": 8.406,
"eval_steps_per_second": 1.401,
"step": 10
},
{
"epoch": 1.36,
"learning_rate": 7e-06,
"loss": 2.8494,
"step": 15
},
{
"epoch": 1.36,
"eval_loss": 2.6970536708831787,
"eval_runtime": 0.7302,
"eval_samples_per_second": 8.216,
"eval_steps_per_second": 1.369,
"step": 15
},
{
"epoch": 1.82,
"learning_rate": 7e-06,
"loss": 2.7035,
"step": 20
},
{
"epoch": 1.82,
"eval_loss": 2.6939141750335693,
"eval_runtime": 0.7129,
"eval_samples_per_second": 8.416,
"eval_steps_per_second": 1.403,
"step": 20
},
{
"epoch": 2.27,
"learning_rate": 7e-06,
"loss": 2.5667,
"step": 25
},
{
"epoch": 2.27,
"eval_loss": 2.6903278827667236,
"eval_runtime": 0.7707,
"eval_samples_per_second": 7.785,
"eval_steps_per_second": 1.298,
"step": 25
},
{
"epoch": 2.73,
"learning_rate": 7e-06,
"loss": 2.7766,
"step": 30
},
{
"epoch": 2.73,
"eval_loss": 2.6863253116607666,
"eval_runtime": 0.7245,
"eval_samples_per_second": 8.281,
"eval_steps_per_second": 1.38,
"step": 30
},
{
"epoch": 3.18,
"learning_rate": 7e-06,
"loss": 2.7507,
"step": 35
},
{
"epoch": 3.18,
"eval_loss": 2.6823132038116455,
"eval_runtime": 0.7678,
"eval_samples_per_second": 7.815,
"eval_steps_per_second": 1.302,
"step": 35
},
{
"epoch": 3.64,
"learning_rate": 7e-06,
"loss": 2.7033,
"step": 40
},
{
"epoch": 3.64,
"eval_loss": 2.677713632583618,
"eval_runtime": 0.7464,
"eval_samples_per_second": 8.038,
"eval_steps_per_second": 1.34,
"step": 40
},
{
"epoch": 4.09,
"learning_rate": 7e-06,
"loss": 2.714,
"step": 45
},
{
"epoch": 4.09,
"eval_loss": 2.6728098392486572,
"eval_runtime": 0.7176,
"eval_samples_per_second": 8.361,
"eval_steps_per_second": 1.393,
"step": 45
},
{
"epoch": 4.55,
"learning_rate": 7e-06,
"loss": 2.7606,
"step": 50
},
{
"epoch": 4.55,
"eval_loss": 2.6675455570220947,
"eval_runtime": 0.7365,
"eval_samples_per_second": 8.147,
"eval_steps_per_second": 1.358,
"step": 50
},
{
"epoch": 5.0,
"learning_rate": 7e-06,
"loss": 2.6694,
"step": 55
},
{
"epoch": 5.0,
"eval_loss": 2.6618385314941406,
"eval_runtime": 0.7145,
"eval_samples_per_second": 8.398,
"eval_steps_per_second": 1.4,
"step": 55
},
{
"epoch": 5.45,
"learning_rate": 7e-06,
"loss": 2.6477,
"step": 60
},
{
"epoch": 5.45,
"eval_loss": 2.6561715602874756,
"eval_runtime": 0.7766,
"eval_samples_per_second": 7.726,
"eval_steps_per_second": 1.288,
"step": 60
},
{
"epoch": 5.91,
"learning_rate": 7e-06,
"loss": 2.7613,
"step": 65
},
{
"epoch": 5.91,
"eval_loss": 2.649792432785034,
"eval_runtime": 0.7178,
"eval_samples_per_second": 8.359,
"eval_steps_per_second": 1.393,
"step": 65
},
{
"epoch": 6.36,
"learning_rate": 7e-06,
"loss": 2.6446,
"step": 70
},
{
"epoch": 6.36,
"eval_loss": 2.64331316947937,
"eval_runtime": 0.7629,
"eval_samples_per_second": 7.865,
"eval_steps_per_second": 1.311,
"step": 70
},
{
"epoch": 6.82,
"learning_rate": 7e-06,
"loss": 2.7039,
"step": 75
},
{
"epoch": 6.82,
"eval_loss": 2.6366422176361084,
"eval_runtime": 0.7438,
"eval_samples_per_second": 8.067,
"eval_steps_per_second": 1.345,
"step": 75
},
{
"epoch": 7.27,
"learning_rate": 7e-06,
"loss": 2.5376,
"step": 80
},
{
"epoch": 7.27,
"eval_loss": 2.6297147274017334,
"eval_runtime": 0.7738,
"eval_samples_per_second": 7.754,
"eval_steps_per_second": 1.292,
"step": 80
},
{
"epoch": 7.73,
"learning_rate": 7e-06,
"loss": 2.6866,
"step": 85
},
{
"epoch": 7.73,
"eval_loss": 2.622584581375122,
"eval_runtime": 0.7654,
"eval_samples_per_second": 7.839,
"eval_steps_per_second": 1.307,
"step": 85
},
{
"epoch": 8.18,
"learning_rate": 7e-06,
"loss": 2.6996,
"step": 90
},
{
"epoch": 8.18,
"eval_loss": 2.615074396133423,
"eval_runtime": 0.7534,
"eval_samples_per_second": 7.964,
"eval_steps_per_second": 1.327,
"step": 90
},
{
"epoch": 8.64,
"learning_rate": 7e-06,
"loss": 2.72,
"step": 95
},
{
"epoch": 8.64,
"eval_loss": 2.6072492599487305,
"eval_runtime": 0.7627,
"eval_samples_per_second": 7.867,
"eval_steps_per_second": 1.311,
"step": 95
},
{
"epoch": 9.09,
"learning_rate": 7e-06,
"loss": 2.4708,
"step": 100
},
{
"epoch": 9.09,
"eval_loss": 2.5988943576812744,
"eval_runtime": 0.7612,
"eval_samples_per_second": 7.882,
"eval_steps_per_second": 1.314,
"step": 100
},
{
"epoch": 9.55,
"learning_rate": 7e-06,
"loss": 2.5311,
"step": 105
},
{
"epoch": 9.55,
"eval_loss": 2.5901479721069336,
"eval_runtime": 0.7561,
"eval_samples_per_second": 7.935,
"eval_steps_per_second": 1.322,
"step": 105
},
{
"epoch": 10.0,
"learning_rate": 7e-06,
"loss": 2.6912,
"step": 110
},
{
"epoch": 10.0,
"eval_loss": 2.580698013305664,
"eval_runtime": 0.7525,
"eval_samples_per_second": 7.973,
"eval_steps_per_second": 1.329,
"step": 110
},
{
"epoch": 10.45,
"learning_rate": 7e-06,
"loss": 2.6089,
"step": 115
},
{
"epoch": 10.45,
"eval_loss": 2.5717146396636963,
"eval_runtime": 0.765,
"eval_samples_per_second": 7.843,
"eval_steps_per_second": 1.307,
"step": 115
},
{
"epoch": 10.91,
"learning_rate": 7e-06,
"loss": 2.4803,
"step": 120
},
{
"epoch": 10.91,
"eval_loss": 2.5612823963165283,
"eval_runtime": 0.7202,
"eval_samples_per_second": 8.331,
"eval_steps_per_second": 1.388,
"step": 120
},
{
"epoch": 11.36,
"learning_rate": 7e-06,
"loss": 2.6576,
"step": 125
},
{
"epoch": 11.36,
"eval_loss": 2.5491273403167725,
"eval_runtime": 0.7237,
"eval_samples_per_second": 8.291,
"eval_steps_per_second": 1.382,
"step": 125
},
{
"epoch": 11.82,
"learning_rate": 7e-06,
"loss": 2.4848,
"step": 130
},
{
"epoch": 11.82,
"eval_loss": 2.536881446838379,
"eval_runtime": 0.749,
"eval_samples_per_second": 8.011,
"eval_steps_per_second": 1.335,
"step": 130
},
{
"epoch": 12.27,
"learning_rate": 7e-06,
"loss": 2.4875,
"step": 135
},
{
"epoch": 12.27,
"eval_loss": 2.524549722671509,
"eval_runtime": 0.7273,
"eval_samples_per_second": 8.25,
"eval_steps_per_second": 1.375,
"step": 135
},
{
"epoch": 12.73,
"learning_rate": 7e-06,
"loss": 2.5707,
"step": 140
},
{
"epoch": 12.73,
"eval_loss": 2.511542558670044,
"eval_runtime": 0.7341,
"eval_samples_per_second": 8.174,
"eval_steps_per_second": 1.362,
"step": 140
},
{
"epoch": 13.18,
"learning_rate": 7e-06,
"loss": 2.3694,
"step": 145
},
{
"epoch": 13.18,
"eval_loss": 2.4980533123016357,
"eval_runtime": 0.733,
"eval_samples_per_second": 8.186,
"eval_steps_per_second": 1.364,
"step": 145
},
{
"epoch": 13.64,
"learning_rate": 7e-06,
"loss": 2.515,
"step": 150
},
{
"epoch": 13.64,
"eval_loss": 2.4852240085601807,
"eval_runtime": 0.7212,
"eval_samples_per_second": 8.319,
"eval_steps_per_second": 1.387,
"step": 150
},
{
"epoch": 14.09,
"learning_rate": 7e-06,
"loss": 2.5051,
"step": 155
},
{
"epoch": 14.09,
"eval_loss": 2.471349000930786,
"eval_runtime": 0.7219,
"eval_samples_per_second": 8.311,
"eval_steps_per_second": 1.385,
"step": 155
},
{
"epoch": 14.55,
"learning_rate": 7e-06,
"loss": 2.5314,
"step": 160
},
{
"epoch": 14.55,
"eval_loss": 2.456756830215454,
"eval_runtime": 0.7344,
"eval_samples_per_second": 8.17,
"eval_steps_per_second": 1.362,
"step": 160
},
{
"epoch": 15.0,
"learning_rate": 7e-06,
"loss": 2.3409,
"step": 165
},
{
"epoch": 15.0,
"eval_loss": 2.4444773197174072,
"eval_runtime": 0.7207,
"eval_samples_per_second": 8.326,
"eval_steps_per_second": 1.388,
"step": 165
},
{
"epoch": 15.45,
"learning_rate": 7e-06,
"loss": 2.4272,
"step": 170
},
{
"epoch": 15.45,
"eval_loss": 2.4356348514556885,
"eval_runtime": 0.7258,
"eval_samples_per_second": 8.267,
"eval_steps_per_second": 1.378,
"step": 170
},
{
"epoch": 15.91,
"learning_rate": 7e-06,
"loss": 2.4189,
"step": 175
},
{
"epoch": 15.91,
"eval_loss": 2.428696393966675,
"eval_runtime": 0.7229,
"eval_samples_per_second": 8.3,
"eval_steps_per_second": 1.383,
"step": 175
},
{
"epoch": 16.36,
"learning_rate": 7e-06,
"loss": 2.3876,
"step": 180
},
{
"epoch": 16.36,
"eval_loss": 2.422900915145874,
"eval_runtime": 0.7211,
"eval_samples_per_second": 8.321,
"eval_steps_per_second": 1.387,
"step": 180
},
{
"epoch": 16.82,
"learning_rate": 7e-06,
"loss": 2.3212,
"step": 185
},
{
"epoch": 16.82,
"eval_loss": 2.4173238277435303,
"eval_runtime": 0.72,
"eval_samples_per_second": 8.334,
"eval_steps_per_second": 1.389,
"step": 185
},
{
"epoch": 17.27,
"learning_rate": 7e-06,
"loss": 2.3173,
"step": 190
},
{
"epoch": 17.27,
"eval_loss": 2.4116928577423096,
"eval_runtime": 0.7204,
"eval_samples_per_second": 8.329,
"eval_steps_per_second": 1.388,
"step": 190
},
{
"epoch": 17.73,
"learning_rate": 7e-06,
"loss": 2.3327,
"step": 195
},
{
"epoch": 17.73,
"eval_loss": 2.4064242839813232,
"eval_runtime": 0.7198,
"eval_samples_per_second": 8.335,
"eval_steps_per_second": 1.389,
"step": 195
},
{
"epoch": 18.18,
"learning_rate": 7e-06,
"loss": 2.5256,
"step": 200
},
{
"epoch": 18.18,
"eval_loss": 2.4015276432037354,
"eval_runtime": 0.7429,
"eval_samples_per_second": 8.077,
"eval_steps_per_second": 1.346,
"step": 200
},
{
"epoch": 18.64,
"learning_rate": 7e-06,
"loss": 2.2825,
"step": 205
},
{
"epoch": 18.64,
"eval_loss": 2.3970677852630615,
"eval_runtime": 0.7175,
"eval_samples_per_second": 8.363,
"eval_steps_per_second": 1.394,
"step": 205
},
{
"epoch": 19.09,
"learning_rate": 7e-06,
"loss": 2.3417,
"step": 210
},
{
"epoch": 19.09,
"eval_loss": 2.3925275802612305,
"eval_runtime": 0.7168,
"eval_samples_per_second": 8.37,
"eval_steps_per_second": 1.395,
"step": 210
},
{
"epoch": 19.55,
"learning_rate": 7e-06,
"loss": 2.2123,
"step": 215
},
{
"epoch": 19.55,
"eval_loss": 2.388213872909546,
"eval_runtime": 0.7288,
"eval_samples_per_second": 8.232,
"eval_steps_per_second": 1.372,
"step": 215
},
{
"epoch": 20.0,
"learning_rate": 7e-06,
"loss": 2.3696,
"step": 220
},
{
"epoch": 20.0,
"eval_loss": 2.384182929992676,
"eval_runtime": 0.7182,
"eval_samples_per_second": 8.354,
"eval_steps_per_second": 1.392,
"step": 220
},
{
"epoch": 20.45,
"learning_rate": 7e-06,
"loss": 2.4359,
"step": 225
},
{
"epoch": 20.45,
"eval_loss": 2.380094528198242,
"eval_runtime": 0.718,
"eval_samples_per_second": 8.357,
"eval_steps_per_second": 1.393,
"step": 225
},
{
"epoch": 20.91,
"learning_rate": 7e-06,
"loss": 2.1279,
"step": 230
},
{
"epoch": 20.91,
"eval_loss": 2.3762617111206055,
"eval_runtime": 0.7166,
"eval_samples_per_second": 8.372,
"eval_steps_per_second": 1.395,
"step": 230
},
{
"epoch": 21.36,
"learning_rate": 7e-06,
"loss": 2.3891,
"step": 235
},
{
"epoch": 21.36,
"eval_loss": 2.3726041316986084,
"eval_runtime": 0.7167,
"eval_samples_per_second": 8.371,
"eval_steps_per_second": 1.395,
"step": 235
},
{
"epoch": 21.82,
"learning_rate": 7e-06,
"loss": 2.1994,
"step": 240
},
{
"epoch": 21.82,
"eval_loss": 2.36929988861084,
"eval_runtime": 0.7208,
"eval_samples_per_second": 8.324,
"eval_steps_per_second": 1.387,
"step": 240
},
{
"epoch": 22.27,
"learning_rate": 7e-06,
"loss": 2.2041,
"step": 245
},
{
"epoch": 22.27,
"eval_loss": 2.3658368587493896,
"eval_runtime": 0.7161,
"eval_samples_per_second": 8.379,
"eval_steps_per_second": 1.396,
"step": 245
},
{
"epoch": 22.73,
"learning_rate": 7e-06,
"loss": 2.3645,
"step": 250
},
{
"epoch": 22.73,
"eval_loss": 2.362556219100952,
"eval_runtime": 0.7162,
"eval_samples_per_second": 8.378,
"eval_steps_per_second": 1.396,
"step": 250
},
{
"epoch": 23.18,
"learning_rate": 7e-06,
"loss": 2.2448,
"step": 255
},
{
"epoch": 23.18,
"eval_loss": 2.359351873397827,
"eval_runtime": 0.7198,
"eval_samples_per_second": 8.336,
"eval_steps_per_second": 1.389,
"step": 255
},
{
"epoch": 23.64,
"learning_rate": 7e-06,
"loss": 2.1683,
"step": 260
},
{
"epoch": 23.64,
"eval_loss": 2.3562166690826416,
"eval_runtime": 0.7185,
"eval_samples_per_second": 8.351,
"eval_steps_per_second": 1.392,
"step": 260
},
{
"epoch": 24.09,
"learning_rate": 7e-06,
"loss": 2.34,
"step": 265
},
{
"epoch": 24.09,
"eval_loss": 2.352797746658325,
"eval_runtime": 0.717,
"eval_samples_per_second": 8.368,
"eval_steps_per_second": 1.395,
"step": 265
},
{
"epoch": 24.55,
"learning_rate": 7e-06,
"loss": 2.2091,
"step": 270
},
{
"epoch": 24.55,
"eval_loss": 2.349076271057129,
"eval_runtime": 0.7168,
"eval_samples_per_second": 8.371,
"eval_steps_per_second": 1.395,
"step": 270
},
{
"epoch": 25.0,
"learning_rate": 7e-06,
"loss": 2.2733,
"step": 275
},
{
"epoch": 25.0,
"eval_loss": 2.3457562923431396,
"eval_runtime": 0.716,
"eval_samples_per_second": 8.379,
"eval_steps_per_second": 1.397,
"step": 275
},
{
"epoch": 25.45,
"learning_rate": 7e-06,
"loss": 2.3123,
"step": 280
},
{
"epoch": 25.45,
"eval_loss": 2.3424742221832275,
"eval_runtime": 0.7162,
"eval_samples_per_second": 8.378,
"eval_steps_per_second": 1.396,
"step": 280
},
{
"epoch": 25.91,
"learning_rate": 7e-06,
"loss": 2.1236,
"step": 285
},
{
"epoch": 25.91,
"eval_loss": 2.3390562534332275,
"eval_runtime": 0.7167,
"eval_samples_per_second": 8.372,
"eval_steps_per_second": 1.395,
"step": 285
},
{
"epoch": 26.36,
"learning_rate": 7e-06,
"loss": 2.328,
"step": 290
},
{
"epoch": 26.36,
"eval_loss": 2.3360955715179443,
"eval_runtime": 0.732,
"eval_samples_per_second": 8.197,
"eval_steps_per_second": 1.366,
"step": 290
},
{
"epoch": 26.82,
"learning_rate": 7e-06,
"loss": 2.117,
"step": 295
},
{
"epoch": 26.82,
"eval_loss": 2.333092451095581,
"eval_runtime": 0.7171,
"eval_samples_per_second": 8.367,
"eval_steps_per_second": 1.395,
"step": 295
},
{
"epoch": 27.27,
"learning_rate": 7e-06,
"loss": 2.2671,
"step": 300
},
{
"epoch": 27.27,
"eval_loss": 2.330547571182251,
"eval_runtime": 0.7166,
"eval_samples_per_second": 8.373,
"eval_steps_per_second": 1.395,
"step": 300
}
],
"max_steps": 550,
"num_train_epochs": 50,
"total_flos": 116301968424960.0,
"trial_name": null,
"trial_params": null
}