smolm-autoreg-bpe-seed_555 / trainer_state.json
kanishka's picture
End of training
67d5a5d
raw
history blame
4.87 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 8260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.61,
"learning_rate": 6.25e-05,
"loss": 5.8796,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.4610925470362255,
"eval_loss": 3.108325958251953,
"eval_runtime": 4.3048,
"eval_samples_per_second": 1170.326,
"eval_steps_per_second": 2.323,
"step": 826
},
{
"epoch": 1.21,
"learning_rate": 0.000125,
"loss": 3.1165,
"step": 1000
},
{
"epoch": 1.82,
"learning_rate": 0.0001875,
"loss": 2.802,
"step": 1500
},
{
"epoch": 2.0,
"eval_accuracy": 0.49653186960204804,
"eval_loss": 2.7454917430877686,
"eval_runtime": 4.0976,
"eval_samples_per_second": 1229.489,
"eval_steps_per_second": 2.44,
"step": 1652
},
{
"epoch": 2.42,
"learning_rate": 0.00025,
"loss": 2.6268,
"step": 2000
},
{
"epoch": 3.0,
"eval_accuracy": 0.5115797107338558,
"eval_loss": 2.573380947113037,
"eval_runtime": 4.4435,
"eval_samples_per_second": 1133.781,
"eval_steps_per_second": 2.25,
"step": 2478
},
{
"epoch": 3.03,
"learning_rate": 0.0003125,
"loss": 2.5157,
"step": 2500
},
{
"epoch": 3.63,
"learning_rate": 0.000375,
"loss": 2.4165,
"step": 3000
},
{
"epoch": 4.0,
"eval_accuracy": 0.5211369966209564,
"eval_loss": 2.4666714668273926,
"eval_runtime": 4.525,
"eval_samples_per_second": 1113.377,
"eval_steps_per_second": 2.21,
"step": 3304
},
{
"epoch": 4.24,
"learning_rate": 0.00043750000000000006,
"loss": 2.3502,
"step": 3500
},
{
"epoch": 4.84,
"learning_rate": 0.0005,
"loss": 2.2892,
"step": 4000
},
{
"epoch": 5.0,
"eval_accuracy": 0.5287937658050783,
"eval_loss": 2.394850969314575,
"eval_runtime": 4.6178,
"eval_samples_per_second": 1090.991,
"eval_steps_per_second": 2.166,
"step": 4130
},
{
"epoch": 5.45,
"learning_rate": 0.0005625000000000001,
"loss": 2.2315,
"step": 4500
},
{
"epoch": 6.0,
"eval_accuracy": 0.5337701187510354,
"eval_loss": 2.344557523727417,
"eval_runtime": 4.6005,
"eval_samples_per_second": 1095.088,
"eval_steps_per_second": 2.174,
"step": 4956
},
{
"epoch": 6.05,
"learning_rate": 0.000625,
"loss": 2.2096,
"step": 5000
},
{
"epoch": 6.66,
"learning_rate": 0.0006875,
"loss": 2.1587,
"step": 5500
},
{
"epoch": 7.0,
"eval_accuracy": 0.5373570314429236,
"eval_loss": 2.3208389282226562,
"eval_runtime": 4.4883,
"eval_samples_per_second": 1122.477,
"eval_steps_per_second": 2.228,
"step": 5782
},
{
"epoch": 7.26,
"learning_rate": 0.00075,
"loss": 2.139,
"step": 6000
},
{
"epoch": 7.87,
"learning_rate": 0.0008125,
"loss": 2.1253,
"step": 6500
},
{
"epoch": 8.0,
"eval_accuracy": 0.5394279069622053,
"eval_loss": 2.3043758869171143,
"eval_runtime": 4.5793,
"eval_samples_per_second": 1100.17,
"eval_steps_per_second": 2.184,
"step": 6608
},
{
"epoch": 8.47,
"learning_rate": 0.0008750000000000001,
"loss": 2.0858,
"step": 7000
},
{
"epoch": 9.0,
"eval_accuracy": 0.5403687877641734,
"eval_loss": 2.2939975261688232,
"eval_runtime": 4.5389,
"eval_samples_per_second": 1109.953,
"eval_steps_per_second": 2.203,
"step": 7434
},
{
"epoch": 9.08,
"learning_rate": 0.0009375,
"loss": 2.0892,
"step": 7500
},
{
"epoch": 9.69,
"learning_rate": 0.001,
"loss": 2.0556,
"step": 8000
},
{
"epoch": 10.0,
"eval_accuracy": 0.5416644525230297,
"eval_loss": 2.2877631187438965,
"eval_runtime": 4.6002,
"eval_samples_per_second": 1095.162,
"eval_steps_per_second": 2.174,
"step": 8260
},
{
"epoch": 10.0,
"step": 8260,
"total_flos": 2562796651806720.0,
"train_loss": 2.5528629182903297,
"train_runtime": 747.7642,
"train_samples_per_second": 706.265,
"train_steps_per_second": 11.046
}
],
"logging_steps": 500,
"max_steps": 8260,
"num_train_epochs": 10,
"save_steps": 2000,
"total_flos": 2562796651806720.0,
"trial_name": null,
"trial_params": null
}