amazon-reviews-input-output-6.7b / trainer_state.json
AlekseyKorshuk's picture
End of training
374c50e
raw
history blame
11.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 32,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 0,
"loss": 2.9912,
"step": 1
},
{
"epoch": 0.06,
"eval_accuracy": 0.040365853658536587,
"eval_loss": 2.744140625,
"eval_runtime": 4.8585,
"eval_samples_per_second": 20.583,
"eval_steps_per_second": 0.412,
"step": 1
},
{
"epoch": 0.12,
"learning_rate": 0,
"loss": 2.9329,
"step": 2
},
{
"epoch": 0.12,
"eval_accuracy": 0.040365853658536587,
"eval_loss": 2.744140625,
"eval_runtime": 6.3845,
"eval_samples_per_second": 15.663,
"eval_steps_per_second": 0.313,
"step": 2
},
{
"epoch": 0.19,
"learning_rate": 0.0,
"loss": 2.9138,
"step": 3
},
{
"epoch": 0.19,
"eval_accuracy": 0.038902439024390244,
"eval_loss": 2.826171875,
"eval_runtime": 5.4611,
"eval_samples_per_second": 18.311,
"eval_steps_per_second": 0.366,
"step": 3
},
{
"epoch": 0.25,
"learning_rate": 5e-05,
"loss": 2.9395,
"step": 4
},
{
"epoch": 0.25,
"eval_accuracy": 0.038902439024390244,
"eval_loss": 2.826171875,
"eval_runtime": 6.1625,
"eval_samples_per_second": 16.227,
"eval_steps_per_second": 0.325,
"step": 4
},
{
"epoch": 0.31,
"learning_rate": 5e-05,
"loss": 2.9109,
"step": 5
},
{
"epoch": 0.31,
"eval_accuracy": 0.03985772357723577,
"eval_loss": 2.794921875,
"eval_runtime": 6.2368,
"eval_samples_per_second": 16.034,
"eval_steps_per_second": 0.321,
"step": 5
},
{
"epoch": 0.38,
"learning_rate": 5e-05,
"loss": 2.8394,
"step": 6
},
{
"epoch": 0.38,
"eval_accuracy": 0.04030487804878049,
"eval_loss": 2.74609375,
"eval_runtime": 6.1734,
"eval_samples_per_second": 16.198,
"eval_steps_per_second": 0.324,
"step": 6
},
{
"epoch": 0.44,
"learning_rate": 5e-05,
"loss": 2.9365,
"step": 7
},
{
"epoch": 0.44,
"eval_accuracy": 0.03985772357723577,
"eval_loss": 2.720703125,
"eval_runtime": 6.2486,
"eval_samples_per_second": 16.004,
"eval_steps_per_second": 0.32,
"step": 7
},
{
"epoch": 0.5,
"learning_rate": 5e-05,
"loss": 2.7588,
"step": 8
},
{
"epoch": 0.5,
"eval_accuracy": 0.040325203252032524,
"eval_loss": 2.70703125,
"eval_runtime": 6.0675,
"eval_samples_per_second": 16.481,
"eval_steps_per_second": 0.33,
"step": 8
},
{
"epoch": 0.56,
"learning_rate": 5e-05,
"loss": 2.9751,
"step": 9
},
{
"epoch": 0.56,
"eval_accuracy": 0.04073170731707317,
"eval_loss": 2.681640625,
"eval_runtime": 6.0505,
"eval_samples_per_second": 16.528,
"eval_steps_per_second": 0.331,
"step": 9
},
{
"epoch": 0.62,
"learning_rate": 5e-05,
"loss": 2.844,
"step": 10
},
{
"epoch": 0.62,
"eval_accuracy": 0.040426829268292684,
"eval_loss": 2.673828125,
"eval_runtime": 5.8947,
"eval_samples_per_second": 16.964,
"eval_steps_per_second": 0.339,
"step": 10
},
{
"epoch": 0.69,
"learning_rate": 5e-05,
"loss": 2.731,
"step": 11
},
{
"epoch": 0.69,
"eval_accuracy": 0.04056910569105691,
"eval_loss": 2.66796875,
"eval_runtime": 5.7332,
"eval_samples_per_second": 17.442,
"eval_steps_per_second": 0.349,
"step": 11
},
{
"epoch": 0.75,
"learning_rate": 5e-05,
"loss": 2.7434,
"step": 12
},
{
"epoch": 0.75,
"eval_accuracy": 0.04040650406504065,
"eval_loss": 2.669921875,
"eval_runtime": 5.6141,
"eval_samples_per_second": 17.812,
"eval_steps_per_second": 0.356,
"step": 12
},
{
"epoch": 0.81,
"learning_rate": 5e-05,
"loss": 2.9043,
"step": 13
},
{
"epoch": 0.81,
"eval_accuracy": 0.039979674796747966,
"eval_loss": 2.685546875,
"eval_runtime": 5.112,
"eval_samples_per_second": 19.562,
"eval_steps_per_second": 0.391,
"step": 13
},
{
"epoch": 0.88,
"learning_rate": 5e-05,
"loss": 2.8564,
"step": 14
},
{
"epoch": 0.88,
"eval_accuracy": 0.039979674796747966,
"eval_loss": 2.685546875,
"eval_runtime": 6.1791,
"eval_samples_per_second": 16.184,
"eval_steps_per_second": 0.324,
"step": 14
},
{
"epoch": 0.94,
"learning_rate": 5e-05,
"loss": 2.8716,
"step": 15
},
{
"epoch": 0.94,
"eval_accuracy": 0.039979674796747966,
"eval_loss": 2.685546875,
"eval_runtime": 6.2221,
"eval_samples_per_second": 16.072,
"eval_steps_per_second": 0.321,
"step": 15
},
{
"epoch": 1.0,
"learning_rate": 5e-05,
"loss": 2.896,
"step": 16
},
{
"epoch": 1.0,
"eval_accuracy": 0.03975609756097561,
"eval_loss": 2.6953125,
"eval_runtime": 5.7373,
"eval_samples_per_second": 17.43,
"eval_steps_per_second": 0.349,
"step": 16
},
{
"epoch": 1.06,
"learning_rate": 5e-05,
"loss": 1.9858,
"step": 17
},
{
"epoch": 1.06,
"eval_accuracy": 0.039979674796747966,
"eval_loss": 2.70703125,
"eval_runtime": 5.5135,
"eval_samples_per_second": 18.137,
"eval_steps_per_second": 0.363,
"step": 17
},
{
"epoch": 1.12,
"learning_rate": 5e-05,
"loss": 2.0563,
"step": 18
},
{
"epoch": 1.12,
"eval_accuracy": 0.040020325203252036,
"eval_loss": 2.728515625,
"eval_runtime": 6.3828,
"eval_samples_per_second": 15.667,
"eval_steps_per_second": 0.313,
"step": 18
},
{
"epoch": 1.19,
"learning_rate": 5e-05,
"loss": 2.04,
"step": 19
},
{
"epoch": 1.19,
"eval_accuracy": 0.03981707317073171,
"eval_loss": 2.767578125,
"eval_runtime": 5.4095,
"eval_samples_per_second": 18.486,
"eval_steps_per_second": 0.37,
"step": 19
},
{
"epoch": 1.25,
"learning_rate": 5e-05,
"loss": 1.9885,
"step": 20
},
{
"epoch": 1.25,
"eval_accuracy": 0.03955284552845528,
"eval_loss": 2.791015625,
"eval_runtime": 5.4272,
"eval_samples_per_second": 18.426,
"eval_steps_per_second": 0.369,
"step": 20
},
{
"epoch": 1.31,
"learning_rate": 5e-05,
"loss": 2.09,
"step": 21
},
{
"epoch": 1.31,
"eval_accuracy": 0.03930894308943089,
"eval_loss": 2.796875,
"eval_runtime": 6.3475,
"eval_samples_per_second": 15.754,
"eval_steps_per_second": 0.315,
"step": 21
},
{
"epoch": 1.38,
"learning_rate": 5e-05,
"loss": 2.059,
"step": 22
},
{
"epoch": 1.38,
"eval_accuracy": 0.03951219512195122,
"eval_loss": 2.810546875,
"eval_runtime": 6.0188,
"eval_samples_per_second": 16.615,
"eval_steps_per_second": 0.332,
"step": 22
},
{
"epoch": 1.44,
"learning_rate": 5e-05,
"loss": 2.0498,
"step": 23
},
{
"epoch": 1.44,
"eval_accuracy": 0.03975609756097561,
"eval_loss": 2.79296875,
"eval_runtime": 5.4742,
"eval_samples_per_second": 18.268,
"eval_steps_per_second": 0.365,
"step": 23
},
{
"epoch": 1.5,
"learning_rate": 5e-05,
"loss": 1.9568,
"step": 24
},
{
"epoch": 1.5,
"eval_accuracy": 0.04008130081300813,
"eval_loss": 2.791015625,
"eval_runtime": 5.9198,
"eval_samples_per_second": 16.892,
"eval_steps_per_second": 0.338,
"step": 24
},
{
"epoch": 1.56,
"learning_rate": 5e-05,
"loss": 2.1418,
"step": 25
},
{
"epoch": 1.56,
"eval_accuracy": 0.039776422764227645,
"eval_loss": 2.79296875,
"eval_runtime": 5.5505,
"eval_samples_per_second": 18.016,
"eval_steps_per_second": 0.36,
"step": 25
},
{
"epoch": 1.62,
"learning_rate": 5e-05,
"loss": 1.975,
"step": 26
},
{
"epoch": 1.62,
"eval_accuracy": 0.03967479674796748,
"eval_loss": 2.79296875,
"eval_runtime": 5.108,
"eval_samples_per_second": 19.577,
"eval_steps_per_second": 0.392,
"step": 26
},
{
"epoch": 1.69,
"learning_rate": 5e-05,
"loss": 1.996,
"step": 27
},
{
"epoch": 1.69,
"eval_accuracy": 0.03932926829268293,
"eval_loss": 2.794921875,
"eval_runtime": 5.1867,
"eval_samples_per_second": 19.28,
"eval_steps_per_second": 0.386,
"step": 27
},
{
"epoch": 1.75,
"learning_rate": 5e-05,
"loss": 1.9617,
"step": 28
},
{
"epoch": 1.75,
"eval_accuracy": 0.03916666666666667,
"eval_loss": 2.8046875,
"eval_runtime": 5.8792,
"eval_samples_per_second": 17.009,
"eval_steps_per_second": 0.34,
"step": 28
},
{
"epoch": 1.81,
"learning_rate": 5e-05,
"loss": 2.2062,
"step": 29
},
{
"epoch": 1.81,
"eval_accuracy": 0.038760162601626014,
"eval_loss": 2.814453125,
"eval_runtime": 5.1614,
"eval_samples_per_second": 19.375,
"eval_steps_per_second": 0.387,
"step": 29
},
{
"epoch": 1.88,
"learning_rate": 5e-05,
"loss": 1.9929,
"step": 30
},
{
"epoch": 1.88,
"eval_accuracy": 0.038597560975609756,
"eval_loss": 2.814453125,
"eval_runtime": 5.7986,
"eval_samples_per_second": 17.245,
"eval_steps_per_second": 0.345,
"step": 30
},
{
"epoch": 1.94,
"learning_rate": 5e-05,
"loss": 1.9235,
"step": 31
},
{
"epoch": 1.94,
"eval_accuracy": 0.039004065040650404,
"eval_loss": 2.828125,
"eval_runtime": 6.2602,
"eval_samples_per_second": 15.974,
"eval_steps_per_second": 0.319,
"step": 31
},
{
"epoch": 2.0,
"learning_rate": 5e-05,
"loss": 1.9127,
"step": 32
},
{
"epoch": 2.0,
"eval_accuracy": 0.03882113821138211,
"eval_loss": 2.857421875,
"eval_runtime": 5.5269,
"eval_samples_per_second": 18.093,
"eval_steps_per_second": 0.362,
"step": 32
},
{
"epoch": 2.0,
"step": 32,
"total_flos": 10341801000960.0,
"train_loss": 2.4493942260742188,
"train_runtime": 515.1457,
"train_samples_per_second": 3.882,
"train_steps_per_second": 0.062
}
],
"max_steps": 32,
"num_train_epochs": 2,
"total_flos": 10341801000960.0,
"trial_name": null,
"trial_params": null
}