teapotllm / checkpoint-5580 /trainer_state.json
zakerytclarke's picture
Upload folder using huggingface_hub
78398a7 verified
{
"best_metric": 0.02781366929411888,
"best_model_checkpoint": "./teapotllm/checkpoint-372",
"epoch": 30.0,
"eval_steps": 500,
"global_step": 5580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"em_boolean": 0.4,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.0,
"epoch": 1.0,
"mean_em": 0.13636363636363635,
"mean_similarity": 0.7151702378848286,
"mean_word_count_diff": 4.0606060606060606,
"similarity_boolean": 0.6859854146838188,
"similarity_extraction": 0.6776353690773249,
"similarity_qa": 0.7660255491733551,
"similarity_summarization": 0.7428409457206726,
"similarity_unanswerable": 0.7166690183803439,
"word_count_diff_boolean": 1.9,
"word_count_diff_extraction": 3.1,
"word_count_diff_qa": 4.7,
"word_count_diff_summarization": 19.25,
"word_count_diff_unanswerable": 2.9375
},
{
"epoch": 1.0,
"grad_norm": 0.1633770614862442,
"learning_rate": 4.8333333333333334e-05,
"loss": 0.0597,
"step": 186
},
{
"epoch": 1.0,
"eval_loss": 0.02993270382285118,
"eval_runtime": 4.3173,
"eval_samples_per_second": 15.287,
"eval_steps_per_second": 2.085,
"step": 186
},
{
"em_boolean": 0.4,
"em_extraction": 0.4,
"em_qa": 0.2,
"em_summarization": 0.0,
"em_unanswerable": 0.03125,
"epoch": 2.0,
"mean_em": 0.16666666666666666,
"mean_similarity": 0.6823387349193747,
"mean_word_count_diff": 4.393939393939394,
"similarity_boolean": 0.6429030410945415,
"similarity_extraction": 0.5662426881492137,
"similarity_qa": 0.8177247762680053,
"similarity_summarization": 0.6609117537736893,
"similarity_unanswerable": 0.6913126385770738,
"word_count_diff_boolean": 2.0,
"word_count_diff_extraction": 4.5,
"word_count_diff_qa": 4.4,
"word_count_diff_summarization": 18.25,
"word_count_diff_unanswerable": 3.375
},
{
"epoch": 2.0,
"grad_norm": 0.11337712407112122,
"learning_rate": 4.666666666666667e-05,
"loss": 0.0236,
"step": 372
},
{
"epoch": 2.0,
"eval_loss": 0.02781366929411888,
"eval_runtime": 4.3427,
"eval_samples_per_second": 15.198,
"eval_steps_per_second": 2.072,
"step": 372
},
{
"em_boolean": 0.7,
"em_extraction": 0.4,
"em_qa": 0.1,
"em_summarization": 0.0,
"em_unanswerable": 0.03125,
"epoch": 3.0,
"mean_em": 0.19696969696969696,
"mean_similarity": 0.7136092173556486,
"mean_word_count_diff": 4.0606060606060606,
"similarity_boolean": 0.7808200724422931,
"similarity_extraction": 0.5972317367792129,
"similarity_qa": 0.8595586240291595,
"similarity_summarization": 0.624700665473938,
"similarity_unanswerable": 0.6944781672209501,
"word_count_diff_boolean": 1.2,
"word_count_diff_extraction": 5.3,
"word_count_diff_qa": 3.5,
"word_count_diff_summarization": 18.0,
"word_count_diff_unanswerable": 3.0
},
{
"epoch": 3.0,
"grad_norm": 0.3930008113384247,
"learning_rate": 4.5e-05,
"loss": 0.015,
"step": 558
},
{
"epoch": 3.0,
"eval_loss": 0.02964785322546959,
"eval_runtime": 4.3399,
"eval_samples_per_second": 15.208,
"eval_steps_per_second": 2.074,
"step": 558
},
{
"em_boolean": 0.6,
"em_extraction": 0.3,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.09375,
"epoch": 4.0,
"mean_em": 0.18181818181818182,
"mean_similarity": 0.7020406423305924,
"mean_word_count_diff": 4.287878787878788,
"similarity_boolean": 0.7741398263722659,
"similarity_extraction": 0.5548319160938263,
"similarity_qa": 0.8464474320411682,
"similarity_summarization": 0.6931064277887344,
"similarity_unanswerable": 0.681502029299736,
"word_count_diff_boolean": 1.4,
"word_count_diff_extraction": 4.6,
"word_count_diff_qa": 3.8,
"word_count_diff_summarization": 18.75,
"word_count_diff_unanswerable": 3.4375
},
{
"epoch": 4.0,
"grad_norm": 0.2295440137386322,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.0102,
"step": 744
},
{
"epoch": 4.0,
"eval_loss": 0.03305710479617119,
"eval_runtime": 4.326,
"eval_samples_per_second": 15.257,
"eval_steps_per_second": 2.08,
"step": 744
},
{
"em_boolean": 0.6,
"em_extraction": 0.4,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.125,
"epoch": 5.0,
"mean_em": 0.21212121212121213,
"mean_similarity": 0.7231389704075727,
"mean_word_count_diff": 4.363636363636363,
"similarity_boolean": 0.7543415576219559,
"similarity_extraction": 0.5749588221311569,
"similarity_qa": 0.7916842460632324,
"similarity_summarization": 0.8001345098018646,
"similarity_unanswerable": 0.7286496171727777,
"word_count_diff_boolean": 1.6,
"word_count_diff_extraction": 5.9,
"word_count_diff_qa": 6.0,
"word_count_diff_summarization": 15.75,
"word_count_diff_unanswerable": 2.8125
},
{
"epoch": 5.0,
"grad_norm": 0.16206224262714386,
"learning_rate": 4.166666666666667e-05,
"loss": 0.0073,
"step": 930
},
{
"epoch": 5.0,
"eval_loss": 0.03383969888091087,
"eval_runtime": 4.3253,
"eval_samples_per_second": 15.259,
"eval_steps_per_second": 2.081,
"step": 930
},
{
"em_boolean": 0.5,
"em_extraction": 0.4,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.09375,
"epoch": 6.0,
"mean_em": 0.18181818181818182,
"mean_similarity": 0.7190481367887873,
"mean_word_count_diff": 3.712121212121212,
"similarity_boolean": 0.7263102680444717,
"similarity_extraction": 0.5906288996338844,
"similarity_qa": 0.8479191601276398,
"similarity_summarization": 0.7078308612108231,
"similarity_unanswerable": 0.7180396970361471,
"word_count_diff_boolean": 1.6,
"word_count_diff_extraction": 4.1,
"word_count_diff_qa": 3.7,
"word_count_diff_summarization": 18.5,
"word_count_diff_unanswerable": 2.40625
},
{
"epoch": 6.0,
"grad_norm": 0.11092964559793472,
"learning_rate": 4e-05,
"loss": 0.0063,
"step": 1116
},
{
"epoch": 6.0,
"eval_loss": 0.036639757454395294,
"eval_runtime": 4.3411,
"eval_samples_per_second": 15.204,
"eval_steps_per_second": 2.073,
"step": 1116
},
{
"em_boolean": 0.5,
"em_extraction": 0.4,
"em_qa": 0.1,
"em_summarization": 0.0,
"em_unanswerable": 0.09375,
"epoch": 7.0,
"mean_em": 0.19696969696969696,
"mean_similarity": 0.7222502547683138,
"mean_word_count_diff": 3.4696969696969697,
"similarity_boolean": 0.7063896596431732,
"similarity_extraction": 0.613977988064289,
"similarity_qa": 0.8624586880207061,
"similarity_summarization": 0.7022114843130112,
"similarity_unanswerable": 0.7197314850054681,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 3.8,
"word_count_diff_qa": 4.0,
"word_count_diff_summarization": 18.25,
"word_count_diff_unanswerable": 2.4375
},
{
"epoch": 7.0,
"grad_norm": 0.0025342688895761967,
"learning_rate": 3.8333333333333334e-05,
"loss": 0.0051,
"step": 1302
},
{
"epoch": 7.0,
"eval_loss": 0.03565499931573868,
"eval_runtime": 4.3456,
"eval_samples_per_second": 15.188,
"eval_steps_per_second": 2.071,
"step": 1302
},
{
"em_boolean": 0.7,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.09375,
"epoch": 8.0,
"mean_em": 0.22727272727272727,
"mean_similarity": 0.7595055665482174,
"mean_word_count_diff": 3.6363636363636362,
"similarity_boolean": 0.827492555975914,
"similarity_extraction": 0.6385330930352211,
"similarity_qa": 0.8492773473262787,
"similarity_summarization": 0.7314918637275696,
"similarity_unanswerable": 0.7515115616843104,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 3.0,
"word_count_diff_qa": 4.7,
"word_count_diff_summarization": 17.75,
"word_count_diff_unanswerable": 2.875
},
{
"epoch": 8.0,
"grad_norm": 0.015746578574180603,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.0059,
"step": 1488
},
{
"epoch": 8.0,
"eval_loss": 0.03911704570055008,
"eval_runtime": 4.3337,
"eval_samples_per_second": 15.229,
"eval_steps_per_second": 2.077,
"step": 1488
},
{
"em_boolean": 0.8,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.1875,
"epoch": 9.0,
"mean_em": 0.2878787878787879,
"mean_similarity": 0.7606699131993633,
"mean_word_count_diff": 3.621212121212121,
"similarity_boolean": 0.8291448082774877,
"similarity_extraction": 0.7344626411795616,
"similarity_qa": 0.8531238198280334,
"similarity_summarization": 0.6869091093540192,
"similarity_unanswerable": 0.7277895356528461,
"word_count_diff_boolean": 1.7,
"word_count_diff_extraction": 2.5,
"word_count_diff_qa": 4.3,
"word_count_diff_summarization": 18.25,
"word_count_diff_unanswerable": 2.53125
},
{
"epoch": 9.0,
"grad_norm": 0.2743629217147827,
"learning_rate": 3.5e-05,
"loss": 0.0032,
"step": 1674
},
{
"epoch": 9.0,
"eval_loss": 0.04249922186136246,
"eval_runtime": 4.3316,
"eval_samples_per_second": 15.237,
"eval_steps_per_second": 2.078,
"step": 1674
},
{
"em_boolean": 0.9,
"em_extraction": 0.5,
"em_qa": 0.1,
"em_summarization": 0.0,
"em_unanswerable": 0.1875,
"epoch": 10.0,
"mean_em": 0.3181818181818182,
"mean_similarity": 0.7545153351895737,
"mean_word_count_diff": 3.5606060606060606,
"similarity_boolean": 0.9258624315261841,
"similarity_extraction": 0.6548894591629505,
"similarity_qa": 0.7920802533626556,
"similarity_summarization": 0.7080870717763901,
"similarity_unanswerable": 0.7261669498402625,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 3.5,
"word_count_diff_qa": 4.3,
"word_count_diff_summarization": 17.75,
"word_count_diff_unanswerable": 2.6875
},
{
"epoch": 10.0,
"grad_norm": 0.19264960289001465,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0023,
"step": 1860
},
{
"epoch": 10.0,
"eval_loss": 0.044133350253105164,
"eval_runtime": 4.332,
"eval_samples_per_second": 15.236,
"eval_steps_per_second": 2.078,
"step": 1860
},
{
"em_boolean": 0.7,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 11.0,
"mean_em": 0.2878787878787879,
"mean_similarity": 0.7554914088863315,
"mean_word_count_diff": 3.4545454545454546,
"similarity_boolean": 0.8498284339904785,
"similarity_extraction": 0.6388141810894012,
"similarity_qa": 0.8062415808439255,
"similarity_summarization": 0.7252775132656097,
"similarity_unanswerable": 0.7503900304436684,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 3.4,
"word_count_diff_qa": 5.3,
"word_count_diff_summarization": 15.25,
"word_count_diff_unanswerable": 2.5
},
{
"epoch": 11.0,
"grad_norm": 0.026405496522784233,
"learning_rate": 3.1666666666666666e-05,
"loss": 0.0018,
"step": 2046
},
{
"epoch": 11.0,
"eval_loss": 0.04769740626215935,
"eval_runtime": 4.3231,
"eval_samples_per_second": 15.267,
"eval_steps_per_second": 2.082,
"step": 2046
},
{
"em_boolean": 0.4,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.09375,
"epoch": 12.0,
"mean_em": 0.18181818181818182,
"mean_similarity": 0.70488141804482,
"mean_word_count_diff": 3.712121212121212,
"similarity_boolean": 0.6591130249202252,
"similarity_extraction": 0.7003911420702934,
"similarity_qa": 0.7918749868869781,
"similarity_summarization": 0.6370653212070465,
"similarity_unanswerable": 0.7018787739798427,
"word_count_diff_boolean": 1.6,
"word_count_diff_extraction": 3.2,
"word_count_diff_qa": 3.5,
"word_count_diff_summarization": 18.75,
"word_count_diff_unanswerable": 2.71875
},
{
"epoch": 12.0,
"grad_norm": 0.009582425467669964,
"learning_rate": 3e-05,
"loss": 0.0021,
"step": 2232
},
{
"epoch": 12.0,
"eval_loss": 0.04564819857478142,
"eval_runtime": 4.3388,
"eval_samples_per_second": 15.211,
"eval_steps_per_second": 2.074,
"step": 2232
},
{
"em_boolean": 0.6,
"em_extraction": 0.4,
"em_qa": 0.1,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 13.0,
"mean_em": 0.2727272727272727,
"mean_similarity": 0.732733571506811,
"mean_word_count_diff": 3.5303030303030303,
"similarity_boolean": 0.7299912668764591,
"similarity_extraction": 0.6099361270666123,
"similarity_qa": 0.8338733792304993,
"similarity_summarization": 0.6718393042683601,
"similarity_unanswerable": 0.7479703365825117,
"word_count_diff_boolean": 1.4,
"word_count_diff_extraction": 4.2,
"word_count_diff_qa": 3.3,
"word_count_diff_summarization": 16.75,
"word_count_diff_unanswerable": 2.40625
},
{
"epoch": 13.0,
"grad_norm": 0.015427525155246258,
"learning_rate": 2.8333333333333335e-05,
"loss": 0.006,
"step": 2418
},
{
"epoch": 13.0,
"eval_loss": 0.0508425235748291,
"eval_runtime": 4.3235,
"eval_samples_per_second": 15.265,
"eval_steps_per_second": 2.082,
"step": 2418
},
{
"em_boolean": 0.7,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.1875,
"epoch": 14.0,
"mean_em": 0.2727272727272727,
"mean_similarity": 0.7405690112800309,
"mean_word_count_diff": 3.4242424242424243,
"similarity_boolean": 0.8725614547729492,
"similarity_extraction": 0.6553262293338775,
"similarity_qa": 0.7748158514499665,
"similarity_summarization": 0.7374982982873917,
"similarity_unanswerable": 0.7156414436176419,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 3.3,
"word_count_diff_qa": 5.1,
"word_count_diff_summarization": 18.0,
"word_count_diff_unanswerable": 2.1875
},
{
"epoch": 14.0,
"grad_norm": 0.02267725020647049,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.0014,
"step": 2604
},
{
"epoch": 14.0,
"eval_loss": 0.0475737489759922,
"eval_runtime": 4.3517,
"eval_samples_per_second": 15.166,
"eval_steps_per_second": 2.068,
"step": 2604
},
{
"em_boolean": 0.5,
"em_extraction": 0.6,
"em_qa": 0.1,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 15.0,
"mean_em": 0.2878787878787879,
"mean_similarity": 0.7692180305267825,
"mean_word_count_diff": 3.1666666666666665,
"similarity_boolean": 0.7965274572372436,
"similarity_extraction": 0.7734156735241413,
"similarity_qa": 0.8109998881816864,
"similarity_summarization": 0.6901126950979233,
"similarity_unanswerable": 0.7562034076545388,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 1.5,
"word_count_diff_qa": 3.8,
"word_count_diff_summarization": 18.5,
"word_count_diff_unanswerable": 2.5625
},
{
"epoch": 15.0,
"grad_norm": 0.04372488334774971,
"learning_rate": 2.5e-05,
"loss": 0.0014,
"step": 2790
},
{
"epoch": 15.0,
"eval_loss": 0.04670372232794762,
"eval_runtime": 4.3278,
"eval_samples_per_second": 15.25,
"eval_steps_per_second": 2.08,
"step": 2790
},
{
"em_boolean": 0.4,
"em_extraction": 0.7,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 16.0,
"mean_em": 0.2727272727272727,
"mean_similarity": 0.7683560192923654,
"mean_word_count_diff": 3.5757575757575757,
"similarity_boolean": 0.6552163552492857,
"similarity_extraction": 0.7891500160098076,
"similarity_qa": 0.8301358222961426,
"similarity_summarization": 0.7006907612085342,
"similarity_unanswerable": 0.7863660091534257,
"word_count_diff_boolean": 1.7,
"word_count_diff_extraction": 1.9,
"word_count_diff_qa": 4.9,
"word_count_diff_summarization": 21.25,
"word_count_diff_unanswerable": 2.0625
},
{
"epoch": 16.0,
"grad_norm": 0.017858723178505898,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.0013,
"step": 2976
},
{
"epoch": 16.0,
"eval_loss": 0.0506870336830616,
"eval_runtime": 4.3168,
"eval_samples_per_second": 15.289,
"eval_steps_per_second": 2.085,
"step": 2976
},
{
"em_boolean": 0.4,
"em_extraction": 0.3,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 17.0,
"mean_em": 0.21212121212121213,
"mean_similarity": 0.7196075666808721,
"mean_word_count_diff": 3.803030303030303,
"similarity_boolean": 0.6327047817409038,
"similarity_extraction": 0.6252625986933709,
"similarity_qa": 0.8158903121948242,
"similarity_summarization": 0.6718292534351349,
"similarity_unanswerable": 0.7521314206533134,
"word_count_diff_boolean": 1.6,
"word_count_diff_extraction": 3.6,
"word_count_diff_qa": 3.9,
"word_count_diff_summarization": 18.25,
"word_count_diff_unanswerable": 2.71875
},
{
"epoch": 17.0,
"grad_norm": 0.0030904498416930437,
"learning_rate": 2.1666666666666667e-05,
"loss": 0.0009,
"step": 3162
},
{
"epoch": 17.0,
"eval_loss": 0.05454666167497635,
"eval_runtime": 4.3302,
"eval_samples_per_second": 15.242,
"eval_steps_per_second": 2.078,
"step": 3162
},
{
"em_boolean": 0.4,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.1875,
"epoch": 18.0,
"mean_em": 0.22727272727272727,
"mean_similarity": 0.7074560875524626,
"mean_word_count_diff": 3.6818181818181817,
"similarity_boolean": 0.6091547535732389,
"similarity_extraction": 0.6446325138211251,
"similarity_qa": 0.8525063633918762,
"similarity_summarization": 0.6083656400442123,
"similarity_unanswerable": 0.7248657159507275,
"word_count_diff_boolean": 1.7,
"word_count_diff_extraction": 2.6,
"word_count_diff_qa": 4.0,
"word_count_diff_summarization": 18.75,
"word_count_diff_unanswerable": 2.65625
},
{
"epoch": 18.0,
"grad_norm": 0.0558372437953949,
"learning_rate": 2e-05,
"loss": 0.0008,
"step": 3348
},
{
"epoch": 18.0,
"eval_loss": 0.054993122816085815,
"eval_runtime": 4.3426,
"eval_samples_per_second": 15.198,
"eval_steps_per_second": 2.073,
"step": 3348
},
{
"em_boolean": 0.6,
"em_extraction": 0.6,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 19.0,
"mean_em": 0.2878787878787879,
"mean_similarity": 0.7609088637612083,
"mean_word_count_diff": 3.196969696969697,
"similarity_boolean": 0.7781090468168259,
"similarity_extraction": 0.7332883447408676,
"similarity_qa": 0.7839098423719406,
"similarity_summarization": 0.6715002506971359,
"similarity_unanswerable": 0.7681534895673394,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 1.9,
"word_count_diff_qa": 4.7,
"word_count_diff_summarization": 18.25,
"word_count_diff_unanswerable": 2.25
},
{
"epoch": 19.0,
"grad_norm": 0.2534376084804535,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.001,
"step": 3534
},
{
"epoch": 19.0,
"eval_loss": 0.05550788715481758,
"eval_runtime": 4.3349,
"eval_samples_per_second": 15.225,
"eval_steps_per_second": 2.076,
"step": 3534
},
{
"em_boolean": 0.5,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 20.0,
"mean_em": 0.25757575757575757,
"mean_similarity": 0.7586507165070736,
"mean_word_count_diff": 3.5454545454545454,
"similarity_boolean": 0.6818089708685875,
"similarity_extraction": 0.6717077270150185,
"similarity_qa": 0.8592052400112152,
"similarity_summarization": 0.6685295552015305,
"similarity_unanswerable": 0.7896753028035164,
"word_count_diff_boolean": 1.7,
"word_count_diff_extraction": 2.1,
"word_count_diff_qa": 4.4,
"word_count_diff_summarization": 18.0,
"word_count_diff_unanswerable": 2.5
},
{
"epoch": 20.0,
"grad_norm": 0.011831851676106453,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0008,
"step": 3720
},
{
"epoch": 20.0,
"eval_loss": 0.053514618426561356,
"eval_runtime": 4.3467,
"eval_samples_per_second": 15.184,
"eval_steps_per_second": 2.071,
"step": 3720
},
{
"em_boolean": 0.8,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 21.0,
"mean_em": 0.30303030303030304,
"mean_similarity": 0.7693727000644712,
"mean_word_count_diff": 3.212121212121212,
"similarity_boolean": 0.8541430443525314,
"similarity_extraction": 0.7111731946468354,
"similarity_qa": 0.8433568000793457,
"similarity_summarization": 0.538779329508543,
"similarity_unanswerable": 0.7667734529823065,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 1.8,
"word_count_diff_qa": 4.5,
"word_count_diff_summarization": 17.0,
"word_count_diff_unanswerable": 2.53125
},
{
"epoch": 21.0,
"grad_norm": 0.021487198770046234,
"learning_rate": 1.5e-05,
"loss": 0.0007,
"step": 3906
},
{
"epoch": 21.0,
"eval_loss": 0.05830617994070053,
"eval_runtime": 4.3417,
"eval_samples_per_second": 15.201,
"eval_steps_per_second": 2.073,
"step": 3906
},
{
"em_boolean": 0.5,
"em_extraction": 0.3,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 22.0,
"mean_em": 0.22727272727272727,
"mean_similarity": 0.7141596485267986,
"mean_word_count_diff": 3.5303030303030303,
"similarity_boolean": 0.7537968754768372,
"similarity_extraction": 0.5005216524004936,
"similarity_qa": 0.8255742013454437,
"similarity_summarization": 0.6753722429275513,
"similarity_unanswerable": 0.7385662668384612,
"word_count_diff_boolean": 1.2,
"word_count_diff_extraction": 3.1,
"word_count_diff_qa": 3.6,
"word_count_diff_summarization": 18.75,
"word_count_diff_unanswerable": 2.46875
},
{
"epoch": 22.0,
"grad_norm": 0.001079858047887683,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0009,
"step": 4092
},
{
"epoch": 22.0,
"eval_loss": 0.0558372400701046,
"eval_runtime": 4.3409,
"eval_samples_per_second": 15.204,
"eval_steps_per_second": 2.073,
"step": 4092
},
{
"em_boolean": 0.5,
"em_extraction": 0.4,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 23.0,
"mean_em": 0.24242424242424243,
"mean_similarity": 0.7432461308710503,
"mean_word_count_diff": 3.272727272727273,
"similarity_boolean": 0.7965274572372436,
"similarity_extraction": 0.6360977619886399,
"similarity_qa": 0.8315042436122895,
"similarity_summarization": 0.7279509454965591,
"similarity_unanswerable": 0.7344108195975423,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 3.4,
"word_count_diff_qa": 4.2,
"word_count_diff_summarization": 15.25,
"word_count_diff_unanswerable": 2.46875
},
{
"epoch": 23.0,
"grad_norm": 0.1313343644142151,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.0005,
"step": 4278
},
{
"epoch": 23.0,
"eval_loss": 0.05678420513868332,
"eval_runtime": 4.3484,
"eval_samples_per_second": 15.178,
"eval_steps_per_second": 2.07,
"step": 4278
},
{
"em_boolean": 0.5,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 24.0,
"mean_em": 0.25757575757575757,
"mean_similarity": 0.733292786009384,
"mean_word_count_diff": 3.3333333333333335,
"similarity_boolean": 0.6607738882303238,
"similarity_extraction": 0.6927396818995476,
"similarity_qa": 0.8479106187820434,
"similarity_summarization": 0.6271217167377472,
"similarity_unanswerable": 0.7460810975171626,
"word_count_diff_boolean": 1.4,
"word_count_diff_extraction": 2.5,
"word_count_diff_qa": 4.7,
"word_count_diff_summarization": 17.75,
"word_count_diff_unanswerable": 1.96875
},
{
"epoch": 24.0,
"grad_norm": 0.00035786593798547983,
"learning_rate": 1e-05,
"loss": 0.0004,
"step": 4464
},
{
"epoch": 24.0,
"eval_loss": 0.058857183903455734,
"eval_runtime": 4.3529,
"eval_samples_per_second": 15.162,
"eval_steps_per_second": 2.068,
"step": 4464
},
{
"em_boolean": 0.7,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 25.0,
"mean_em": 0.2878787878787879,
"mean_similarity": 0.7524795749535164,
"mean_word_count_diff": 3.6515151515151514,
"similarity_boolean": 0.7810821939259768,
"similarity_extraction": 0.6710324764251709,
"similarity_qa": 0.820537006855011,
"similarity_summarization": 0.6545907258987427,
"similarity_unanswerable": 0.75996163347736,
"word_count_diff_boolean": 1.7,
"word_count_diff_extraction": 3.1,
"word_count_diff_qa": 3.8,
"word_count_diff_summarization": 17.75,
"word_count_diff_unanswerable": 2.625
},
{
"epoch": 25.0,
"grad_norm": 0.2973511815071106,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0006,
"step": 4650
},
{
"epoch": 25.0,
"eval_loss": 0.05786564573645592,
"eval_runtime": 4.3502,
"eval_samples_per_second": 15.172,
"eval_steps_per_second": 2.069,
"step": 4650
},
{
"em_boolean": 0.5,
"em_extraction": 0.6,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.25,
"epoch": 26.0,
"mean_em": 0.2878787878787879,
"mean_similarity": 0.7584869705817916,
"mean_word_count_diff": 3.0303030303030303,
"similarity_boolean": 0.7514585584402085,
"similarity_extraction": 0.7340315699577331,
"similarity_qa": 0.8186799943447113,
"similarity_summarization": 0.7270446717739105,
"similarity_unanswerable": 0.7534456294961274,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 2.4,
"word_count_diff_qa": 4.2,
"word_count_diff_summarization": 17.0,
"word_count_diff_unanswerable": 2.0625
},
{
"epoch": 26.0,
"grad_norm": 0.03420831263065338,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0005,
"step": 4836
},
{
"epoch": 26.0,
"eval_loss": 0.05816643312573433,
"eval_runtime": 4.3476,
"eval_samples_per_second": 15.181,
"eval_steps_per_second": 2.07,
"step": 4836
},
{
"em_boolean": 0.6,
"em_extraction": 0.4,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 27.0,
"mean_em": 0.25757575757575757,
"mean_similarity": 0.7683728897210323,
"mean_word_count_diff": 3.227272727272727,
"similarity_boolean": 0.823177945613861,
"similarity_extraction": 0.674461716413498,
"similarity_qa": 0.8566352903842926,
"similarity_summarization": 0.6613511592149734,
"similarity_unanswerable": 0.7663892675191164,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 2.8,
"word_count_diff_qa": 4.6,
"word_count_diff_summarization": 17.25,
"word_count_diff_unanswerable": 2.1875
},
{
"epoch": 27.0,
"grad_norm": 0.20164477825164795,
"learning_rate": 5e-06,
"loss": 0.0006,
"step": 5022
},
{
"epoch": 27.0,
"eval_loss": 0.05884123966097832,
"eval_runtime": 4.3217,
"eval_samples_per_second": 15.272,
"eval_steps_per_second": 2.083,
"step": 5022
},
{
"em_boolean": 0.7,
"em_extraction": 0.5,
"em_qa": 0.1,
"em_summarization": 0.0,
"em_unanswerable": 0.21875,
"epoch": 28.0,
"mean_em": 0.30303030303030304,
"mean_similarity": 0.7534916117561586,
"mean_word_count_diff": 3.378787878787879,
"similarity_boolean": 0.8725614547729492,
"similarity_extraction": 0.6645052798092366,
"similarity_qa": 0.8722499251365662,
"similarity_summarization": 0.6813686788082123,
"similarity_unanswerable": 0.7159939082339406,
"word_count_diff_boolean": 0.0,
"word_count_diff_extraction": 4.4,
"word_count_diff_qa": 3.3,
"word_count_diff_summarization": 15.75,
"word_count_diff_unanswerable": 2.59375
},
{
"epoch": 28.0,
"grad_norm": 0.09254211187362671,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0005,
"step": 5208
},
{
"epoch": 28.0,
"eval_loss": 0.05950010567903519,
"eval_runtime": 4.3472,
"eval_samples_per_second": 15.182,
"eval_steps_per_second": 2.07,
"step": 5208
},
{
"em_boolean": 0.3,
"em_extraction": 0.5,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.15625,
"epoch": 29.0,
"mean_em": 0.19696969696969696,
"mean_similarity": 0.7206367963755672,
"mean_word_count_diff": 3.257575757575758,
"similarity_boolean": 0.5599547553807497,
"similarity_extraction": 0.6752738565206527,
"similarity_qa": 0.826910001039505,
"similarity_summarization": 0.6975099295377731,
"similarity_unanswerable": 0.7547063347883523,
"word_count_diff_boolean": 2.1,
"word_count_diff_extraction": 3.4,
"word_count_diff_qa": 3.3,
"word_count_diff_summarization": 15.5,
"word_count_diff_unanswerable": 2.03125
},
{
"epoch": 29.0,
"grad_norm": 0.002610220806673169,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0003,
"step": 5394
},
{
"epoch": 29.0,
"eval_loss": 0.05941936746239662,
"eval_runtime": 4.3317,
"eval_samples_per_second": 15.236,
"eval_steps_per_second": 2.078,
"step": 5394
},
{
"em_boolean": 0.6,
"em_extraction": 0.4,
"em_qa": 0.0,
"em_summarization": 0.0,
"em_unanswerable": 0.1875,
"epoch": 30.0,
"mean_em": 0.24242424242424243,
"mean_similarity": 0.7168677732574217,
"mean_word_count_diff": 4.393939393939394,
"similarity_boolean": 0.7321859009563922,
"similarity_extraction": 0.5674642950296402,
"similarity_qa": 0.8223672747612,
"similarity_summarization": 0.6349779516458511,
"similarity_unanswerable": 0.7360370787791908,
"word_count_diff_boolean": 1.7,
"word_count_diff_extraction": 5.9,
"word_count_diff_qa": 4.9,
"word_count_diff_summarization": 18.0,
"word_count_diff_unanswerable": 2.90625
},
{
"epoch": 30.0,
"grad_norm": 0.08226452767848969,
"learning_rate": 0.0,
"loss": 0.0004,
"step": 5580
},
{
"epoch": 30.0,
"eval_loss": 0.05968955159187317,
"eval_runtime": 4.3488,
"eval_samples_per_second": 15.177,
"eval_steps_per_second": 2.07,
"step": 5580
}
],
"logging_steps": 500,
"max_steps": 5580,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.572121033146368e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}