|
{ |
|
"best_metric": 0.02781366929411888, |
|
"best_model_checkpoint": "./teapotllm/checkpoint-372", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 5580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"em_boolean": 0.4, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.0, |
|
"epoch": 1.0, |
|
"mean_em": 0.13636363636363635, |
|
"mean_similarity": 0.7151702378848286, |
|
"mean_word_count_diff": 4.0606060606060606, |
|
"similarity_boolean": 0.6859854146838188, |
|
"similarity_extraction": 0.6776353690773249, |
|
"similarity_qa": 0.7660255491733551, |
|
"similarity_summarization": 0.7428409457206726, |
|
"similarity_unanswerable": 0.7166690183803439, |
|
"word_count_diff_boolean": 1.9, |
|
"word_count_diff_extraction": 3.1, |
|
"word_count_diff_qa": 4.7, |
|
"word_count_diff_summarization": 19.25, |
|
"word_count_diff_unanswerable": 2.9375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1633770614862442, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.0597, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.02993270382285118, |
|
"eval_runtime": 4.3173, |
|
"eval_samples_per_second": 15.287, |
|
"eval_steps_per_second": 2.085, |
|
"step": 186 |
|
}, |
|
{ |
|
"em_boolean": 0.4, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.2, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.03125, |
|
"epoch": 2.0, |
|
"mean_em": 0.16666666666666666, |
|
"mean_similarity": 0.6823387349193747, |
|
"mean_word_count_diff": 4.393939393939394, |
|
"similarity_boolean": 0.6429030410945415, |
|
"similarity_extraction": 0.5662426881492137, |
|
"similarity_qa": 0.8177247762680053, |
|
"similarity_summarization": 0.6609117537736893, |
|
"similarity_unanswerable": 0.6913126385770738, |
|
"word_count_diff_boolean": 2.0, |
|
"word_count_diff_extraction": 4.5, |
|
"word_count_diff_qa": 4.4, |
|
"word_count_diff_summarization": 18.25, |
|
"word_count_diff_unanswerable": 3.375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.11337712407112122, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.0236, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.02781366929411888, |
|
"eval_runtime": 4.3427, |
|
"eval_samples_per_second": 15.198, |
|
"eval_steps_per_second": 2.072, |
|
"step": 372 |
|
}, |
|
{ |
|
"em_boolean": 0.7, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.1, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.03125, |
|
"epoch": 3.0, |
|
"mean_em": 0.19696969696969696, |
|
"mean_similarity": 0.7136092173556486, |
|
"mean_word_count_diff": 4.0606060606060606, |
|
"similarity_boolean": 0.7808200724422931, |
|
"similarity_extraction": 0.5972317367792129, |
|
"similarity_qa": 0.8595586240291595, |
|
"similarity_summarization": 0.624700665473938, |
|
"similarity_unanswerable": 0.6944781672209501, |
|
"word_count_diff_boolean": 1.2, |
|
"word_count_diff_extraction": 5.3, |
|
"word_count_diff_qa": 3.5, |
|
"word_count_diff_summarization": 18.0, |
|
"word_count_diff_unanswerable": 3.0 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.3930008113384247, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.015, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.02964785322546959, |
|
"eval_runtime": 4.3399, |
|
"eval_samples_per_second": 15.208, |
|
"eval_steps_per_second": 2.074, |
|
"step": 558 |
|
}, |
|
{ |
|
"em_boolean": 0.6, |
|
"em_extraction": 0.3, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.09375, |
|
"epoch": 4.0, |
|
"mean_em": 0.18181818181818182, |
|
"mean_similarity": 0.7020406423305924, |
|
"mean_word_count_diff": 4.287878787878788, |
|
"similarity_boolean": 0.7741398263722659, |
|
"similarity_extraction": 0.5548319160938263, |
|
"similarity_qa": 0.8464474320411682, |
|
"similarity_summarization": 0.6931064277887344, |
|
"similarity_unanswerable": 0.681502029299736, |
|
"word_count_diff_boolean": 1.4, |
|
"word_count_diff_extraction": 4.6, |
|
"word_count_diff_qa": 3.8, |
|
"word_count_diff_summarization": 18.75, |
|
"word_count_diff_unanswerable": 3.4375 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.2295440137386322, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.0102, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.03305710479617119, |
|
"eval_runtime": 4.326, |
|
"eval_samples_per_second": 15.257, |
|
"eval_steps_per_second": 2.08, |
|
"step": 744 |
|
}, |
|
{ |
|
"em_boolean": 0.6, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.125, |
|
"epoch": 5.0, |
|
"mean_em": 0.21212121212121213, |
|
"mean_similarity": 0.7231389704075727, |
|
"mean_word_count_diff": 4.363636363636363, |
|
"similarity_boolean": 0.7543415576219559, |
|
"similarity_extraction": 0.5749588221311569, |
|
"similarity_qa": 0.7916842460632324, |
|
"similarity_summarization": 0.8001345098018646, |
|
"similarity_unanswerable": 0.7286496171727777, |
|
"word_count_diff_boolean": 1.6, |
|
"word_count_diff_extraction": 5.9, |
|
"word_count_diff_qa": 6.0, |
|
"word_count_diff_summarization": 15.75, |
|
"word_count_diff_unanswerable": 2.8125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.16206224262714386, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0073, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.03383969888091087, |
|
"eval_runtime": 4.3253, |
|
"eval_samples_per_second": 15.259, |
|
"eval_steps_per_second": 2.081, |
|
"step": 930 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.09375, |
|
"epoch": 6.0, |
|
"mean_em": 0.18181818181818182, |
|
"mean_similarity": 0.7190481367887873, |
|
"mean_word_count_diff": 3.712121212121212, |
|
"similarity_boolean": 0.7263102680444717, |
|
"similarity_extraction": 0.5906288996338844, |
|
"similarity_qa": 0.8479191601276398, |
|
"similarity_summarization": 0.7078308612108231, |
|
"similarity_unanswerable": 0.7180396970361471, |
|
"word_count_diff_boolean": 1.6, |
|
"word_count_diff_extraction": 4.1, |
|
"word_count_diff_qa": 3.7, |
|
"word_count_diff_summarization": 18.5, |
|
"word_count_diff_unanswerable": 2.40625 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.11092964559793472, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0063, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.036639757454395294, |
|
"eval_runtime": 4.3411, |
|
"eval_samples_per_second": 15.204, |
|
"eval_steps_per_second": 2.073, |
|
"step": 1116 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.1, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.09375, |
|
"epoch": 7.0, |
|
"mean_em": 0.19696969696969696, |
|
"mean_similarity": 0.7222502547683138, |
|
"mean_word_count_diff": 3.4696969696969697, |
|
"similarity_boolean": 0.7063896596431732, |
|
"similarity_extraction": 0.613977988064289, |
|
"similarity_qa": 0.8624586880207061, |
|
"similarity_summarization": 0.7022114843130112, |
|
"similarity_unanswerable": 0.7197314850054681, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 3.8, |
|
"word_count_diff_qa": 4.0, |
|
"word_count_diff_summarization": 18.25, |
|
"word_count_diff_unanswerable": 2.4375 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.0025342688895761967, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.0051, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.03565499931573868, |
|
"eval_runtime": 4.3456, |
|
"eval_samples_per_second": 15.188, |
|
"eval_steps_per_second": 2.071, |
|
"step": 1302 |
|
}, |
|
{ |
|
"em_boolean": 0.7, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.09375, |
|
"epoch": 8.0, |
|
"mean_em": 0.22727272727272727, |
|
"mean_similarity": 0.7595055665482174, |
|
"mean_word_count_diff": 3.6363636363636362, |
|
"similarity_boolean": 0.827492555975914, |
|
"similarity_extraction": 0.6385330930352211, |
|
"similarity_qa": 0.8492773473262787, |
|
"similarity_summarization": 0.7314918637275696, |
|
"similarity_unanswerable": 0.7515115616843104, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 3.0, |
|
"word_count_diff_qa": 4.7, |
|
"word_count_diff_summarization": 17.75, |
|
"word_count_diff_unanswerable": 2.875 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.015746578574180603, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0059, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.03911704570055008, |
|
"eval_runtime": 4.3337, |
|
"eval_samples_per_second": 15.229, |
|
"eval_steps_per_second": 2.077, |
|
"step": 1488 |
|
}, |
|
{ |
|
"em_boolean": 0.8, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.1875, |
|
"epoch": 9.0, |
|
"mean_em": 0.2878787878787879, |
|
"mean_similarity": 0.7606699131993633, |
|
"mean_word_count_diff": 3.621212121212121, |
|
"similarity_boolean": 0.8291448082774877, |
|
"similarity_extraction": 0.7344626411795616, |
|
"similarity_qa": 0.8531238198280334, |
|
"similarity_summarization": 0.6869091093540192, |
|
"similarity_unanswerable": 0.7277895356528461, |
|
"word_count_diff_boolean": 1.7, |
|
"word_count_diff_extraction": 2.5, |
|
"word_count_diff_qa": 4.3, |
|
"word_count_diff_summarization": 18.25, |
|
"word_count_diff_unanswerable": 2.53125 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.2743629217147827, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0032, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.04249922186136246, |
|
"eval_runtime": 4.3316, |
|
"eval_samples_per_second": 15.237, |
|
"eval_steps_per_second": 2.078, |
|
"step": 1674 |
|
}, |
|
{ |
|
"em_boolean": 0.9, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.1, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.1875, |
|
"epoch": 10.0, |
|
"mean_em": 0.3181818181818182, |
|
"mean_similarity": 0.7545153351895737, |
|
"mean_word_count_diff": 3.5606060606060606, |
|
"similarity_boolean": 0.9258624315261841, |
|
"similarity_extraction": 0.6548894591629505, |
|
"similarity_qa": 0.7920802533626556, |
|
"similarity_summarization": 0.7080870717763901, |
|
"similarity_unanswerable": 0.7261669498402625, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 3.5, |
|
"word_count_diff_qa": 4.3, |
|
"word_count_diff_summarization": 17.75, |
|
"word_count_diff_unanswerable": 2.6875 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.19264960289001465, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0023, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.044133350253105164, |
|
"eval_runtime": 4.332, |
|
"eval_samples_per_second": 15.236, |
|
"eval_steps_per_second": 2.078, |
|
"step": 1860 |
|
}, |
|
{ |
|
"em_boolean": 0.7, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 11.0, |
|
"mean_em": 0.2878787878787879, |
|
"mean_similarity": 0.7554914088863315, |
|
"mean_word_count_diff": 3.4545454545454546, |
|
"similarity_boolean": 0.8498284339904785, |
|
"similarity_extraction": 0.6388141810894012, |
|
"similarity_qa": 0.8062415808439255, |
|
"similarity_summarization": 0.7252775132656097, |
|
"similarity_unanswerable": 0.7503900304436684, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 3.4, |
|
"word_count_diff_qa": 5.3, |
|
"word_count_diff_summarization": 15.25, |
|
"word_count_diff_unanswerable": 2.5 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.026405496522784233, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.0018, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.04769740626215935, |
|
"eval_runtime": 4.3231, |
|
"eval_samples_per_second": 15.267, |
|
"eval_steps_per_second": 2.082, |
|
"step": 2046 |
|
}, |
|
{ |
|
"em_boolean": 0.4, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.09375, |
|
"epoch": 12.0, |
|
"mean_em": 0.18181818181818182, |
|
"mean_similarity": 0.70488141804482, |
|
"mean_word_count_diff": 3.712121212121212, |
|
"similarity_boolean": 0.6591130249202252, |
|
"similarity_extraction": 0.7003911420702934, |
|
"similarity_qa": 0.7918749868869781, |
|
"similarity_summarization": 0.6370653212070465, |
|
"similarity_unanswerable": 0.7018787739798427, |
|
"word_count_diff_boolean": 1.6, |
|
"word_count_diff_extraction": 3.2, |
|
"word_count_diff_qa": 3.5, |
|
"word_count_diff_summarization": 18.75, |
|
"word_count_diff_unanswerable": 2.71875 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.009582425467669964, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0021, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.04564819857478142, |
|
"eval_runtime": 4.3388, |
|
"eval_samples_per_second": 15.211, |
|
"eval_steps_per_second": 2.074, |
|
"step": 2232 |
|
}, |
|
{ |
|
"em_boolean": 0.6, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.1, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 13.0, |
|
"mean_em": 0.2727272727272727, |
|
"mean_similarity": 0.732733571506811, |
|
"mean_word_count_diff": 3.5303030303030303, |
|
"similarity_boolean": 0.7299912668764591, |
|
"similarity_extraction": 0.6099361270666123, |
|
"similarity_qa": 0.8338733792304993, |
|
"similarity_summarization": 0.6718393042683601, |
|
"similarity_unanswerable": 0.7479703365825117, |
|
"word_count_diff_boolean": 1.4, |
|
"word_count_diff_extraction": 4.2, |
|
"word_count_diff_qa": 3.3, |
|
"word_count_diff_summarization": 16.75, |
|
"word_count_diff_unanswerable": 2.40625 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.015427525155246258, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.006, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.0508425235748291, |
|
"eval_runtime": 4.3235, |
|
"eval_samples_per_second": 15.265, |
|
"eval_steps_per_second": 2.082, |
|
"step": 2418 |
|
}, |
|
{ |
|
"em_boolean": 0.7, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.1875, |
|
"epoch": 14.0, |
|
"mean_em": 0.2727272727272727, |
|
"mean_similarity": 0.7405690112800309, |
|
"mean_word_count_diff": 3.4242424242424243, |
|
"similarity_boolean": 0.8725614547729492, |
|
"similarity_extraction": 0.6553262293338775, |
|
"similarity_qa": 0.7748158514499665, |
|
"similarity_summarization": 0.7374982982873917, |
|
"similarity_unanswerable": 0.7156414436176419, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 3.3, |
|
"word_count_diff_qa": 5.1, |
|
"word_count_diff_summarization": 18.0, |
|
"word_count_diff_unanswerable": 2.1875 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.02267725020647049, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0014, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.0475737489759922, |
|
"eval_runtime": 4.3517, |
|
"eval_samples_per_second": 15.166, |
|
"eval_steps_per_second": 2.068, |
|
"step": 2604 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.6, |
|
"em_qa": 0.1, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 15.0, |
|
"mean_em": 0.2878787878787879, |
|
"mean_similarity": 0.7692180305267825, |
|
"mean_word_count_diff": 3.1666666666666665, |
|
"similarity_boolean": 0.7965274572372436, |
|
"similarity_extraction": 0.7734156735241413, |
|
"similarity_qa": 0.8109998881816864, |
|
"similarity_summarization": 0.6901126950979233, |
|
"similarity_unanswerable": 0.7562034076545388, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 1.5, |
|
"word_count_diff_qa": 3.8, |
|
"word_count_diff_summarization": 18.5, |
|
"word_count_diff_unanswerable": 2.5625 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.04372488334774971, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0014, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.04670372232794762, |
|
"eval_runtime": 4.3278, |
|
"eval_samples_per_second": 15.25, |
|
"eval_steps_per_second": 2.08, |
|
"step": 2790 |
|
}, |
|
{ |
|
"em_boolean": 0.4, |
|
"em_extraction": 0.7, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 16.0, |
|
"mean_em": 0.2727272727272727, |
|
"mean_similarity": 0.7683560192923654, |
|
"mean_word_count_diff": 3.5757575757575757, |
|
"similarity_boolean": 0.6552163552492857, |
|
"similarity_extraction": 0.7891500160098076, |
|
"similarity_qa": 0.8301358222961426, |
|
"similarity_summarization": 0.7006907612085342, |
|
"similarity_unanswerable": 0.7863660091534257, |
|
"word_count_diff_boolean": 1.7, |
|
"word_count_diff_extraction": 1.9, |
|
"word_count_diff_qa": 4.9, |
|
"word_count_diff_summarization": 21.25, |
|
"word_count_diff_unanswerable": 2.0625 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.017858723178505898, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0013, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.0506870336830616, |
|
"eval_runtime": 4.3168, |
|
"eval_samples_per_second": 15.289, |
|
"eval_steps_per_second": 2.085, |
|
"step": 2976 |
|
}, |
|
{ |
|
"em_boolean": 0.4, |
|
"em_extraction": 0.3, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 17.0, |
|
"mean_em": 0.21212121212121213, |
|
"mean_similarity": 0.7196075666808721, |
|
"mean_word_count_diff": 3.803030303030303, |
|
"similarity_boolean": 0.6327047817409038, |
|
"similarity_extraction": 0.6252625986933709, |
|
"similarity_qa": 0.8158903121948242, |
|
"similarity_summarization": 0.6718292534351349, |
|
"similarity_unanswerable": 0.7521314206533134, |
|
"word_count_diff_boolean": 1.6, |
|
"word_count_diff_extraction": 3.6, |
|
"word_count_diff_qa": 3.9, |
|
"word_count_diff_summarization": 18.25, |
|
"word_count_diff_unanswerable": 2.71875 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.0030904498416930437, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.0009, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.05454666167497635, |
|
"eval_runtime": 4.3302, |
|
"eval_samples_per_second": 15.242, |
|
"eval_steps_per_second": 2.078, |
|
"step": 3162 |
|
}, |
|
{ |
|
"em_boolean": 0.4, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.1875, |
|
"epoch": 18.0, |
|
"mean_em": 0.22727272727272727, |
|
"mean_similarity": 0.7074560875524626, |
|
"mean_word_count_diff": 3.6818181818181817, |
|
"similarity_boolean": 0.6091547535732389, |
|
"similarity_extraction": 0.6446325138211251, |
|
"similarity_qa": 0.8525063633918762, |
|
"similarity_summarization": 0.6083656400442123, |
|
"similarity_unanswerable": 0.7248657159507275, |
|
"word_count_diff_boolean": 1.7, |
|
"word_count_diff_extraction": 2.6, |
|
"word_count_diff_qa": 4.0, |
|
"word_count_diff_summarization": 18.75, |
|
"word_count_diff_unanswerable": 2.65625 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0558372437953949, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0008, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.054993122816085815, |
|
"eval_runtime": 4.3426, |
|
"eval_samples_per_second": 15.198, |
|
"eval_steps_per_second": 2.073, |
|
"step": 3348 |
|
}, |
|
{ |
|
"em_boolean": 0.6, |
|
"em_extraction": 0.6, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 19.0, |
|
"mean_em": 0.2878787878787879, |
|
"mean_similarity": 0.7609088637612083, |
|
"mean_word_count_diff": 3.196969696969697, |
|
"similarity_boolean": 0.7781090468168259, |
|
"similarity_extraction": 0.7332883447408676, |
|
"similarity_qa": 0.7839098423719406, |
|
"similarity_summarization": 0.6715002506971359, |
|
"similarity_unanswerable": 0.7681534895673394, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 1.9, |
|
"word_count_diff_qa": 4.7, |
|
"word_count_diff_summarization": 18.25, |
|
"word_count_diff_unanswerable": 2.25 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.2534376084804535, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.001, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.05550788715481758, |
|
"eval_runtime": 4.3349, |
|
"eval_samples_per_second": 15.225, |
|
"eval_steps_per_second": 2.076, |
|
"step": 3534 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 20.0, |
|
"mean_em": 0.25757575757575757, |
|
"mean_similarity": 0.7586507165070736, |
|
"mean_word_count_diff": 3.5454545454545454, |
|
"similarity_boolean": 0.6818089708685875, |
|
"similarity_extraction": 0.6717077270150185, |
|
"similarity_qa": 0.8592052400112152, |
|
"similarity_summarization": 0.6685295552015305, |
|
"similarity_unanswerable": 0.7896753028035164, |
|
"word_count_diff_boolean": 1.7, |
|
"word_count_diff_extraction": 2.1, |
|
"word_count_diff_qa": 4.4, |
|
"word_count_diff_summarization": 18.0, |
|
"word_count_diff_unanswerable": 2.5 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.011831851676106453, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0008, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.053514618426561356, |
|
"eval_runtime": 4.3467, |
|
"eval_samples_per_second": 15.184, |
|
"eval_steps_per_second": 2.071, |
|
"step": 3720 |
|
}, |
|
{ |
|
"em_boolean": 0.8, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 21.0, |
|
"mean_em": 0.30303030303030304, |
|
"mean_similarity": 0.7693727000644712, |
|
"mean_word_count_diff": 3.212121212121212, |
|
"similarity_boolean": 0.8541430443525314, |
|
"similarity_extraction": 0.7111731946468354, |
|
"similarity_qa": 0.8433568000793457, |
|
"similarity_summarization": 0.538779329508543, |
|
"similarity_unanswerable": 0.7667734529823065, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 1.8, |
|
"word_count_diff_qa": 4.5, |
|
"word_count_diff_summarization": 17.0, |
|
"word_count_diff_unanswerable": 2.53125 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 0.021487198770046234, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0007, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.05830617994070053, |
|
"eval_runtime": 4.3417, |
|
"eval_samples_per_second": 15.201, |
|
"eval_steps_per_second": 2.073, |
|
"step": 3906 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.3, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 22.0, |
|
"mean_em": 0.22727272727272727, |
|
"mean_similarity": 0.7141596485267986, |
|
"mean_word_count_diff": 3.5303030303030303, |
|
"similarity_boolean": 0.7537968754768372, |
|
"similarity_extraction": 0.5005216524004936, |
|
"similarity_qa": 0.8255742013454437, |
|
"similarity_summarization": 0.6753722429275513, |
|
"similarity_unanswerable": 0.7385662668384612, |
|
"word_count_diff_boolean": 1.2, |
|
"word_count_diff_extraction": 3.1, |
|
"word_count_diff_qa": 3.6, |
|
"word_count_diff_summarization": 18.75, |
|
"word_count_diff_unanswerable": 2.46875 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 0.001079858047887683, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0009, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.0558372400701046, |
|
"eval_runtime": 4.3409, |
|
"eval_samples_per_second": 15.204, |
|
"eval_steps_per_second": 2.073, |
|
"step": 4092 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 23.0, |
|
"mean_em": 0.24242424242424243, |
|
"mean_similarity": 0.7432461308710503, |
|
"mean_word_count_diff": 3.272727272727273, |
|
"similarity_boolean": 0.7965274572372436, |
|
"similarity_extraction": 0.6360977619886399, |
|
"similarity_qa": 0.8315042436122895, |
|
"similarity_summarization": 0.7279509454965591, |
|
"similarity_unanswerable": 0.7344108195975423, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 3.4, |
|
"word_count_diff_qa": 4.2, |
|
"word_count_diff_summarization": 15.25, |
|
"word_count_diff_unanswerable": 2.46875 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 0.1313343644142151, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.0005, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.05678420513868332, |
|
"eval_runtime": 4.3484, |
|
"eval_samples_per_second": 15.178, |
|
"eval_steps_per_second": 2.07, |
|
"step": 4278 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 24.0, |
|
"mean_em": 0.25757575757575757, |
|
"mean_similarity": 0.733292786009384, |
|
"mean_word_count_diff": 3.3333333333333335, |
|
"similarity_boolean": 0.6607738882303238, |
|
"similarity_extraction": 0.6927396818995476, |
|
"similarity_qa": 0.8479106187820434, |
|
"similarity_summarization": 0.6271217167377472, |
|
"similarity_unanswerable": 0.7460810975171626, |
|
"word_count_diff_boolean": 1.4, |
|
"word_count_diff_extraction": 2.5, |
|
"word_count_diff_qa": 4.7, |
|
"word_count_diff_summarization": 17.75, |
|
"word_count_diff_unanswerable": 1.96875 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.00035786593798547983, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0004, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.058857183903455734, |
|
"eval_runtime": 4.3529, |
|
"eval_samples_per_second": 15.162, |
|
"eval_steps_per_second": 2.068, |
|
"step": 4464 |
|
}, |
|
{ |
|
"em_boolean": 0.7, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 25.0, |
|
"mean_em": 0.2878787878787879, |
|
"mean_similarity": 0.7524795749535164, |
|
"mean_word_count_diff": 3.6515151515151514, |
|
"similarity_boolean": 0.7810821939259768, |
|
"similarity_extraction": 0.6710324764251709, |
|
"similarity_qa": 0.820537006855011, |
|
"similarity_summarization": 0.6545907258987427, |
|
"similarity_unanswerable": 0.75996163347736, |
|
"word_count_diff_boolean": 1.7, |
|
"word_count_diff_extraction": 3.1, |
|
"word_count_diff_qa": 3.8, |
|
"word_count_diff_summarization": 17.75, |
|
"word_count_diff_unanswerable": 2.625 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 0.2973511815071106, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0006, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.05786564573645592, |
|
"eval_runtime": 4.3502, |
|
"eval_samples_per_second": 15.172, |
|
"eval_steps_per_second": 2.069, |
|
"step": 4650 |
|
}, |
|
{ |
|
"em_boolean": 0.5, |
|
"em_extraction": 0.6, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.25, |
|
"epoch": 26.0, |
|
"mean_em": 0.2878787878787879, |
|
"mean_similarity": 0.7584869705817916, |
|
"mean_word_count_diff": 3.0303030303030303, |
|
"similarity_boolean": 0.7514585584402085, |
|
"similarity_extraction": 0.7340315699577331, |
|
"similarity_qa": 0.8186799943447113, |
|
"similarity_summarization": 0.7270446717739105, |
|
"similarity_unanswerable": 0.7534456294961274, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 2.4, |
|
"word_count_diff_qa": 4.2, |
|
"word_count_diff_summarization": 17.0, |
|
"word_count_diff_unanswerable": 2.0625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.03420831263065338, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.05816643312573433, |
|
"eval_runtime": 4.3476, |
|
"eval_samples_per_second": 15.181, |
|
"eval_steps_per_second": 2.07, |
|
"step": 4836 |
|
}, |
|
{ |
|
"em_boolean": 0.6, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 27.0, |
|
"mean_em": 0.25757575757575757, |
|
"mean_similarity": 0.7683728897210323, |
|
"mean_word_count_diff": 3.227272727272727, |
|
"similarity_boolean": 0.823177945613861, |
|
"similarity_extraction": 0.674461716413498, |
|
"similarity_qa": 0.8566352903842926, |
|
"similarity_summarization": 0.6613511592149734, |
|
"similarity_unanswerable": 0.7663892675191164, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 2.8, |
|
"word_count_diff_qa": 4.6, |
|
"word_count_diff_summarization": 17.25, |
|
"word_count_diff_unanswerable": 2.1875 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 0.20164477825164795, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0006, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.05884123966097832, |
|
"eval_runtime": 4.3217, |
|
"eval_samples_per_second": 15.272, |
|
"eval_steps_per_second": 2.083, |
|
"step": 5022 |
|
}, |
|
{ |
|
"em_boolean": 0.7, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.1, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.21875, |
|
"epoch": 28.0, |
|
"mean_em": 0.30303030303030304, |
|
"mean_similarity": 0.7534916117561586, |
|
"mean_word_count_diff": 3.378787878787879, |
|
"similarity_boolean": 0.8725614547729492, |
|
"similarity_extraction": 0.6645052798092366, |
|
"similarity_qa": 0.8722499251365662, |
|
"similarity_summarization": 0.6813686788082123, |
|
"similarity_unanswerable": 0.7159939082339406, |
|
"word_count_diff_boolean": 0.0, |
|
"word_count_diff_extraction": 4.4, |
|
"word_count_diff_qa": 3.3, |
|
"word_count_diff_summarization": 15.75, |
|
"word_count_diff_unanswerable": 2.59375 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.09254211187362671, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0005, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.05950010567903519, |
|
"eval_runtime": 4.3472, |
|
"eval_samples_per_second": 15.182, |
|
"eval_steps_per_second": 2.07, |
|
"step": 5208 |
|
}, |
|
{ |
|
"em_boolean": 0.3, |
|
"em_extraction": 0.5, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.15625, |
|
"epoch": 29.0, |
|
"mean_em": 0.19696969696969696, |
|
"mean_similarity": 0.7206367963755672, |
|
"mean_word_count_diff": 3.257575757575758, |
|
"similarity_boolean": 0.5599547553807497, |
|
"similarity_extraction": 0.6752738565206527, |
|
"similarity_qa": 0.826910001039505, |
|
"similarity_summarization": 0.6975099295377731, |
|
"similarity_unanswerable": 0.7547063347883523, |
|
"word_count_diff_boolean": 2.1, |
|
"word_count_diff_extraction": 3.4, |
|
"word_count_diff_qa": 3.3, |
|
"word_count_diff_summarization": 15.5, |
|
"word_count_diff_unanswerable": 2.03125 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.002610220806673169, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0003, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.05941936746239662, |
|
"eval_runtime": 4.3317, |
|
"eval_samples_per_second": 15.236, |
|
"eval_steps_per_second": 2.078, |
|
"step": 5394 |
|
}, |
|
{ |
|
"em_boolean": 0.6, |
|
"em_extraction": 0.4, |
|
"em_qa": 0.0, |
|
"em_summarization": 0.0, |
|
"em_unanswerable": 0.1875, |
|
"epoch": 30.0, |
|
"mean_em": 0.24242424242424243, |
|
"mean_similarity": 0.7168677732574217, |
|
"mean_word_count_diff": 4.393939393939394, |
|
"similarity_boolean": 0.7321859009563922, |
|
"similarity_extraction": 0.5674642950296402, |
|
"similarity_qa": 0.8223672747612, |
|
"similarity_summarization": 0.6349779516458511, |
|
"similarity_unanswerable": 0.7360370787791908, |
|
"word_count_diff_boolean": 1.7, |
|
"word_count_diff_extraction": 5.9, |
|
"word_count_diff_qa": 4.9, |
|
"word_count_diff_summarization": 18.0, |
|
"word_count_diff_unanswerable": 2.90625 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.08226452767848969, |
|
"learning_rate": 0.0, |
|
"loss": 0.0004, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.05968955159187317, |
|
"eval_runtime": 4.3488, |
|
"eval_samples_per_second": 15.177, |
|
"eval_steps_per_second": 2.07, |
|
"step": 5580 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5580, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.572121033146368e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|