FossilBERT / trainer_state.json
ljhemmi's picture
Upload 13 files
553b0df
{
"best_metric": 0.9909420289855073,
"best_model_checkpoint": "./fossilBERT_output/checkpoint-5661",
"epoch": 10.0,
"global_step": 6290,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.79,
"learning_rate": 4.6025437201907795e-05,
"loss": 0.1219,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.9829025844930418,
"eval_f1": 0.980708838043966,
"eval_loss": 0.06422444432973862,
"eval_precision": 0.9741532976827094,
"eval_recall": 0.987353206865402,
"eval_runtime": 2.8563,
"eval_samples_per_second": 880.496,
"eval_steps_per_second": 55.315,
"step": 629
},
{
"epoch": 1.59,
"learning_rate": 4.205087440381558e-05,
"loss": 0.0426,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9864811133200795,
"eval_f1": 0.9845313921747042,
"eval_loss": 0.06980240345001221,
"eval_precision": 0.9917506874427131,
"eval_recall": 0.9774164408310749,
"eval_runtime": 2.8485,
"eval_samples_per_second": 882.932,
"eval_steps_per_second": 55.469,
"step": 1258
},
{
"epoch": 2.38,
"learning_rate": 3.8076311605723374e-05,
"loss": 0.0288,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.9876739562624255,
"eval_f1": 0.986054880791723,
"eval_loss": 0.08222242444753647,
"eval_precision": 0.982078853046595,
"eval_recall": 0.990063233965673,
"eval_runtime": 2.8512,
"eval_samples_per_second": 882.071,
"eval_steps_per_second": 55.414,
"step": 1887
},
{
"epoch": 3.18,
"learning_rate": 3.410174880763116e-05,
"loss": 0.0057,
"step": 2000
},
{
"epoch": 3.97,
"learning_rate": 3.0127186009538953e-05,
"loss": 0.0084,
"step": 2500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9880715705765407,
"eval_f1": 0.9864253393665158,
"eval_loss": 0.07759582996368408,
"eval_precision": 0.9882139619220308,
"eval_recall": 0.9846431797651309,
"eval_runtime": 2.839,
"eval_samples_per_second": 885.877,
"eval_steps_per_second": 55.654,
"step": 2516
},
{
"epoch": 4.77,
"learning_rate": 2.6152623211446743e-05,
"loss": 0.0061,
"step": 3000
},
{
"epoch": 5.0,
"eval_accuracy": 0.988469184890656,
"eval_f1": 0.9868599909379248,
"eval_loss": 0.10027419775724411,
"eval_precision": 0.99,
"eval_recall": 0.983739837398374,
"eval_runtime": 2.8617,
"eval_samples_per_second": 878.859,
"eval_steps_per_second": 55.213,
"step": 3145
},
{
"epoch": 5.56,
"learning_rate": 2.2178060413354532e-05,
"loss": 0.0016,
"step": 3500
},
{
"epoch": 6.0,
"eval_accuracy": 0.989662027833002,
"eval_f1": 0.9882777276825968,
"eval_loss": 0.08881077915430069,
"eval_precision": 0.9864986498649865,
"eval_recall": 0.990063233965673,
"eval_runtime": 2.8662,
"eval_samples_per_second": 877.455,
"eval_steps_per_second": 55.124,
"step": 3774
},
{
"epoch": 6.36,
"learning_rate": 1.820349761526232e-05,
"loss": 0.0023,
"step": 4000
},
{
"epoch": 7.0,
"eval_accuracy": 0.9904572564612326,
"eval_f1": 0.9891500904159132,
"eval_loss": 0.09042701870203018,
"eval_precision": 0.9900452488687783,
"eval_recall": 0.988256549232159,
"eval_runtime": 2.8476,
"eval_samples_per_second": 883.214,
"eval_steps_per_second": 55.486,
"step": 4403
},
{
"epoch": 7.15,
"learning_rate": 1.4228934817170111e-05,
"loss": 0.0037,
"step": 4500
},
{
"epoch": 7.95,
"learning_rate": 1.02543720190779e-05,
"loss": 0.0017,
"step": 5000
},
{
"epoch": 8.0,
"eval_accuracy": 0.9888667992047714,
"eval_f1": 0.9872611464968153,
"eval_loss": 0.09531795978546143,
"eval_precision": 0.9945004582951421,
"eval_recall": 0.980126467931346,
"eval_runtime": 2.8577,
"eval_samples_per_second": 880.078,
"eval_steps_per_second": 55.289,
"step": 5032
},
{
"epoch": 8.74,
"learning_rate": 6.279809220985691e-06,
"loss": 0.0,
"step": 5500
},
{
"epoch": 9.0,
"eval_accuracy": 0.9920477137176938,
"eval_f1": 0.9909420289855073,
"eval_loss": 0.0791691467165947,
"eval_precision": 0.9936421435059037,
"eval_recall": 0.988256549232159,
"eval_runtime": 2.8459,
"eval_samples_per_second": 883.726,
"eval_steps_per_second": 55.518,
"step": 5661
},
{
"epoch": 9.54,
"learning_rate": 2.3052464228934817e-06,
"loss": 0.0,
"step": 6000
},
{
"epoch": 10.0,
"eval_accuracy": 0.9916500994035785,
"eval_f1": 0.990493435943866,
"eval_loss": 0.07801677286624908,
"eval_precision": 0.9927404718693285,
"eval_recall": 0.988256549232159,
"eval_runtime": 2.8772,
"eval_samples_per_second": 874.116,
"eval_steps_per_second": 54.915,
"step": 6290
}
],
"max_steps": 6290,
"num_train_epochs": 10,
"total_flos": 3330561570708480.0,
"trial_name": null,
"trial_params": null
}