xcd_classifier / checkpoint-1026 /trainer_state.json
Zmu's picture
Upload folder using huggingface_hub
b021bb0
raw
history blame
8.93 kB
{
"best_metric": 0.028854751959443092,
"best_model_checkpoint": "/tmp/model/checkpoint-1026",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1026,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 7.281553398058253e-06,
"loss": 2.6614,
"step": 17
},
{
"epoch": 0.1,
"learning_rate": 1.5048543689320387e-05,
"loss": 1.2662,
"step": 34
},
{
"epoch": 0.15,
"learning_rate": 2.330097087378641e-05,
"loss": 0.5076,
"step": 51
},
{
"epoch": 0.2,
"learning_rate": 3.155339805825243e-05,
"loss": 0.4482,
"step": 68
},
{
"epoch": 0.25,
"learning_rate": 3.980582524271845e-05,
"loss": 0.3258,
"step": 85
},
{
"epoch": 0.3,
"learning_rate": 4.805825242718447e-05,
"loss": 0.2473,
"step": 102
},
{
"epoch": 0.35,
"learning_rate": 4.929577464788733e-05,
"loss": 0.3911,
"step": 119
},
{
"epoch": 0.4,
"learning_rate": 4.837486457204768e-05,
"loss": 0.4416,
"step": 136
},
{
"epoch": 0.45,
"learning_rate": 4.745395449620802e-05,
"loss": 0.4884,
"step": 153
},
{
"epoch": 0.5,
"learning_rate": 4.653304442036837e-05,
"loss": 0.2937,
"step": 170
},
{
"epoch": 0.55,
"learning_rate": 4.561213434452871e-05,
"loss": 0.3657,
"step": 187
},
{
"epoch": 0.6,
"learning_rate": 4.469122426868906e-05,
"loss": 0.1776,
"step": 204
},
{
"epoch": 0.65,
"learning_rate": 4.377031419284941e-05,
"loss": 0.3519,
"step": 221
},
{
"epoch": 0.7,
"learning_rate": 4.284940411700975e-05,
"loss": 0.3079,
"step": 238
},
{
"epoch": 0.75,
"learning_rate": 4.1928494041170105e-05,
"loss": 0.2125,
"step": 255
},
{
"epoch": 0.8,
"learning_rate": 4.1007583965330447e-05,
"loss": 0.1151,
"step": 272
},
{
"epoch": 0.85,
"learning_rate": 4.0086673889490795e-05,
"loss": 0.3185,
"step": 289
},
{
"epoch": 0.89,
"learning_rate": 3.916576381365114e-05,
"loss": 0.1969,
"step": 306
},
{
"epoch": 0.94,
"learning_rate": 3.8244853737811486e-05,
"loss": 0.2635,
"step": 323
},
{
"epoch": 0.99,
"learning_rate": 3.7323943661971835e-05,
"loss": 0.2407,
"step": 340
},
{
"epoch": 1.0,
"eval_accuracy": 0.9633967789165446,
"eval_auc": 0.9964920446122287,
"eval_f1": 0.9614791987673343,
"eval_loss": 0.10120841860771179,
"eval_precision": 0.9397590361445783,
"eval_recall": 0.9842271293375394,
"eval_runtime": 12.6432,
"eval_samples_per_second": 54.021,
"eval_steps_per_second": 3.401,
"step": 342
},
{
"epoch": 1.04,
"learning_rate": 3.640303358613218e-05,
"loss": 0.18,
"step": 357
},
{
"epoch": 1.09,
"learning_rate": 3.5482123510292526e-05,
"loss": 0.167,
"step": 374
},
{
"epoch": 1.14,
"learning_rate": 3.4561213434452874e-05,
"loss": 0.2256,
"step": 391
},
{
"epoch": 1.19,
"learning_rate": 3.3640303358613216e-05,
"loss": 0.144,
"step": 408
},
{
"epoch": 1.24,
"learning_rate": 3.2719393282773565e-05,
"loss": 0.108,
"step": 425
},
{
"epoch": 1.29,
"learning_rate": 3.179848320693391e-05,
"loss": 0.2115,
"step": 442
},
{
"epoch": 1.34,
"learning_rate": 3.087757313109426e-05,
"loss": 0.1777,
"step": 459
},
{
"epoch": 1.39,
"learning_rate": 2.9956663055254608e-05,
"loss": 0.2568,
"step": 476
},
{
"epoch": 1.44,
"learning_rate": 2.9035752979414954e-05,
"loss": 0.1353,
"step": 493
},
{
"epoch": 1.49,
"learning_rate": 2.81148429035753e-05,
"loss": 0.1635,
"step": 510
},
{
"epoch": 1.54,
"learning_rate": 2.7193932827735648e-05,
"loss": 0.244,
"step": 527
},
{
"epoch": 1.59,
"learning_rate": 2.6273022751895993e-05,
"loss": 0.1751,
"step": 544
},
{
"epoch": 1.64,
"learning_rate": 2.535211267605634e-05,
"loss": 0.1629,
"step": 561
},
{
"epoch": 1.69,
"learning_rate": 2.4431202600216684e-05,
"loss": 0.2097,
"step": 578
},
{
"epoch": 1.74,
"learning_rate": 2.3510292524377033e-05,
"loss": 0.1262,
"step": 595
},
{
"epoch": 1.79,
"learning_rate": 2.258938244853738e-05,
"loss": 0.0854,
"step": 612
},
{
"epoch": 1.84,
"learning_rate": 2.1668472372697727e-05,
"loss": 0.3253,
"step": 629
},
{
"epoch": 1.89,
"learning_rate": 2.0747562296858072e-05,
"loss": 0.0901,
"step": 646
},
{
"epoch": 1.94,
"learning_rate": 1.9826652221018418e-05,
"loss": 0.0816,
"step": 663
},
{
"epoch": 1.99,
"learning_rate": 1.8905742145178766e-05,
"loss": 0.1621,
"step": 680
},
{
"epoch": 2.0,
"eval_accuracy": 0.9853587115666179,
"eval_auc": 0.9993104755994553,
"eval_f1": 0.9843260188087775,
"eval_loss": 0.039207685738801956,
"eval_precision": 0.9781931464174455,
"eval_recall": 0.9905362776025236,
"eval_runtime": 12.5653,
"eval_samples_per_second": 54.356,
"eval_steps_per_second": 3.422,
"step": 684
},
{
"epoch": 2.04,
"learning_rate": 1.7984832069339115e-05,
"loss": 0.154,
"step": 697
},
{
"epoch": 2.09,
"learning_rate": 1.706392199349946e-05,
"loss": 0.1426,
"step": 714
},
{
"epoch": 2.14,
"learning_rate": 1.6143011917659806e-05,
"loss": 0.1294,
"step": 731
},
{
"epoch": 2.19,
"learning_rate": 1.5222101841820153e-05,
"loss": 0.0665,
"step": 748
},
{
"epoch": 2.24,
"learning_rate": 1.4301191765980498e-05,
"loss": 0.0233,
"step": 765
},
{
"epoch": 2.29,
"learning_rate": 1.3380281690140845e-05,
"loss": 0.0811,
"step": 782
},
{
"epoch": 2.34,
"learning_rate": 1.2459371614301192e-05,
"loss": 0.0786,
"step": 799
},
{
"epoch": 2.39,
"learning_rate": 1.153846153846154e-05,
"loss": 0.1104,
"step": 816
},
{
"epoch": 2.44,
"learning_rate": 1.0617551462621887e-05,
"loss": 0.1425,
"step": 833
},
{
"epoch": 2.49,
"learning_rate": 9.696641386782232e-06,
"loss": 0.0252,
"step": 850
},
{
"epoch": 2.54,
"learning_rate": 8.775731310942579e-06,
"loss": 0.1767,
"step": 867
},
{
"epoch": 2.58,
"learning_rate": 7.90899241603467e-06,
"loss": 0.0504,
"step": 884
},
{
"epoch": 2.63,
"learning_rate": 6.988082340195017e-06,
"loss": 0.1102,
"step": 901
},
{
"epoch": 2.68,
"learning_rate": 6.067172264355364e-06,
"loss": 0.11,
"step": 918
},
{
"epoch": 2.73,
"learning_rate": 5.14626218851571e-06,
"loss": 0.0775,
"step": 935
},
{
"epoch": 2.78,
"learning_rate": 4.225352112676056e-06,
"loss": 0.132,
"step": 952
},
{
"epoch": 2.83,
"learning_rate": 3.304442036836403e-06,
"loss": 0.1459,
"step": 969
},
{
"epoch": 2.88,
"learning_rate": 2.38353196099675e-06,
"loss": 0.0426,
"step": 986
},
{
"epoch": 2.93,
"learning_rate": 1.4626218851570965e-06,
"loss": 0.2774,
"step": 1003
},
{
"epoch": 2.98,
"learning_rate": 5.417118093174432e-07,
"loss": 0.0373,
"step": 1020
},
{
"epoch": 3.0,
"eval_accuracy": 0.9897510980966325,
"eval_auc": 0.9995604281946529,
"eval_f1": 0.9889064976228209,
"eval_loss": 0.028854751959443092,
"eval_precision": 0.9936305732484076,
"eval_recall": 0.9842271293375394,
"eval_runtime": 12.521,
"eval_samples_per_second": 54.548,
"eval_steps_per_second": 3.434,
"step": 1026
}
],
"logging_steps": 17,
"max_steps": 1026,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 6.35124346881196e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}