Upload results.json
Browse files- results.json +1 -1
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"validation_set": {"ndcg_at_1": 0.808, "ndcg_at_3": 0.86064, "ndcg_at_5": 0.87217, "ndcg_at_10": 0.87935, "ndcg_at_20": 0.8819, "ndcg_at_50": 0.8855, "ndcg_at_100": 0.88709, "map_at_1": 0.808, "map_at_3": 0.84833, "map_at_5": 0.85473, "map_at_10": 0.85773, "map_at_20": 0.85844, "map_at_50": 0.85904, "map_at_100": 0.85917, "recall_at_1": 0.808, "recall_at_3": 0.896, "recall_at_5": 0.924, "recall_at_10": 0.946, "recall_at_20": 0.956, "recall_at_50": 0.974, "recall_at_100": 0.984, "precision_at_1": 0.808, "precision_at_3": 0.29867, "precision_at_5": 0.1848, "precision_at_10": 0.0946, "precision_at_20": 0.0478, "precision_at_50": 0.01948, "precision_at_100": 0.00984, "mrr_at_1": 0.812, "mrr_at_3": 0.8503333333333334, "mrr_at_5": 0.8566333333333334, "mrr_at_10": 0.8593904761904761, "mrr_at_20": 0.8602524031524031, "mrr_at_50": 0.8609465715630416, "mrr_at_100": 0.8611005335017311, "naucs_at_1_max": 0.3515086743587615, "naucs_at_1_std": 0.03249771727400904, "naucs_at_1_diff1": 0.9474194820287203, "naucs_at_3_max": 0.5962878076950838, "naucs_at_3_std": 0.3982711326066551, "naucs_at_3_diff1": 0.9212715657258372, "naucs_at_5_max": 0.6614698510983347, "naucs_at_5_std": 0.6334095041525363, "naucs_at_5_diff1": 0.9211877733549554, "naucs_at_10_max": 0.6447418473562261, "naucs_at_10_std": 0.7033405954974616, "naucs_at_10_diff1": 0.9290209911124954, "naucs_at_20_max": 0.6058059587471368, "naucs_at_20_std": 0.6962906374671136, "naucs_at_20_diff1": 0.9307147101264778, "naucs_at_50_max": 0.6038210155857129, "naucs_at_50_std": 0.7816921640451006, "naucs_at_50_diff1": 0.9384112619406629, "naucs_at_100_max": 0.663923902894502, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 0.9325980392156906}, "syntheticDocQA_energy": {"ndcg_at_1": 0.93, "ndcg_at_3": 0.94893, "ndcg_at_5": 0.95666, "ndcg_at_10": 0.95956, "ndcg_at_20": 0.95956, "ndcg_at_50": 0.96171, "ndcg_at_100": 0.96171, "map_at_1": 0.93, "map_at_3": 0.945, "map_at_5": 0.949, "map_at_10": 0.95, "map_at_20": 0.95, "map_at_50": 0.95042, "map_at_100": 0.95042, "recall_at_1": 0.93, "recall_at_3": 0.96, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.93, "precision_at_3": 0.32, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.93, "mrr_at_3": 0.9483333333333333, "mrr_at_5": 0.9503333333333334, "mrr_at_10": 0.9513333333333334, "mrr_at_20": 0.9513333333333334, "mrr_at_50": 0.9517878787878787, "mrr_at_100": 0.9517878787878787, "naucs_at_1_max": 0.5745631585967725, "naucs_at_1_std": -0.473856209150329, "naucs_at_1_diff1": 0.9626517273576113, "naucs_at_3_max": 0.7414799253034536, "naucs_at_3_std": -0.28408029878618163, "naucs_at_3_diff1": 0.9346405228758151, "naucs_at_5_max": 0.8692810457516353, "naucs_at_5_std": 0.24042950513538955, "naucs_at_5_diff1": 0.9346405228758136, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": 0.35807656395891135, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": 0.8692810457516413, "naucs_at_20_std": 0.35807656395891135, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "syntheticDocQA_healthcare_industry": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.98155, "ndcg_at_5": 0.98155, "ndcg_at_10": 0.98155, "ndcg_at_20": 0.98155, "ndcg_at_50": 0.98155, "ndcg_at_100": 0.98155, "map_at_1": 0.95, "map_at_3": 0.975, "map_at_5": 0.975, "map_at_10": 0.975, "map_at_20": 0.975, "map_at_50": 0.975, "map_at_100": 0.975, "recall_at_1": 0.95, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.975, "mrr_at_5": 0.975, "mrr_at_10": 0.975, "mrr_at_20": 0.975, "mrr_at_50": 0.975, "mrr_at_100": 0.975, "naucs_at_1_max": 0.7777777777777775, "naucs_at_1_std": 0.1309056956115747, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.98, "ndcg_at_3": 0.98631, "ndcg_at_5": 0.99062, "ndcg_at_10": 0.99062, "ndcg_at_20": 0.99062, "ndcg_at_50": 0.99062, "ndcg_at_100": 0.99062, "map_at_1": 0.98, "map_at_3": 0.985, "map_at_5": 0.9875, "map_at_10": 0.9875, "map_at_20": 0.9875, "map_at_50": 0.9875, "map_at_100": 0.9875, "recall_at_1": 0.98, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.98, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.98, "mrr_at_3": 0.985, "mrr_at_5": 0.9875, "mrr_at_10": 0.9875, "mrr_at_20": 0.9875, "mrr_at_50": 0.9875, "mrr_at_100": 0.9875, "naucs_at_1_max": 0.7770774976657365, "naucs_at_1_std": -0.22035480859010095, "naucs_at_1_diff1": 0.9346405228758133, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.12278244631183229, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "syntheticDocQA_government_reports": {"ndcg_at_1": 0.86, "ndcg_at_3": 0.93309, "ndcg_at_5": 0.9374, "ndcg_at_10": 0.94055, "ndcg_at_20": 0.94055, "ndcg_at_50": 0.94055, "ndcg_at_100": 0.94055, "map_at_1": 0.86, "map_at_3": 0.91667, "map_at_5": 0.91917, "map_at_10": 0.92042, "map_at_20": 0.92042, "map_at_50": 0.92042, "map_at_100": 0.92042, "recall_at_1": 0.86, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.86, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.87, "mrr_at_3": 0.925, "mrr_at_5": 0.925, "mrr_at_10": 0.92625, "mrr_at_20": 0.92625, "mrr_at_50": 0.92625, "mrr_at_100": 0.92625, "naucs_at_1_max": 0.6605930442011902, "naucs_at_1_std": 0.45541776361368935, "naucs_at_1_diff1": 0.880836912844672, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.5401493930905577, "naucs_at_3_diff1": 0.8692810457516296, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.7222222222222276, "naucs_at_5_diff1": 0.8692810457516413, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}, "infovqa_subsampled": {"ndcg_at_1": 0.85, "ndcg_at_3": 0.88928, "ndcg_at_5": 0.89918, "ndcg_at_10": 0.90366, "ndcg_at_20": 0.90816, "ndcg_at_50": 0.9101, "ndcg_at_100": 0.91073, "map_at_1": 0.85, "map_at_3": 0.88, "map_at_5": 0.8855, "map_at_10": 0.88732, "map_at_20": 0.88853, "map_at_50": 0.88883, "map_at_100": 0.88887, "recall_at_1": 0.85, "recall_at_3": 0.916, "recall_at_5": 0.94, "recall_at_10": 0.954, "recall_at_20": 0.972, "recall_at_50": 0.982, "recall_at_100": 0.986, "precision_at_1": 0.85, "precision_at_3": 0.30533, "precision_at_5": 0.188, "precision_at_10": 0.0954, "precision_at_20": 0.0486, "precision_at_50": 0.01964, "precision_at_100": 0.00986, "mrr_at_1": 0.846, "mrr_at_3": 0.8786666666666666, "mrr_at_5": 0.8841666666666667, "mrr_at_10": 0.8859690476190476, "mrr_at_20": 0.8871696210222526, "mrr_at_50": 0.887457817094473, "mrr_at_100": 0.8875065832519349, "naucs_at_1_max": 0.6576374877969413, "naucs_at_1_std": 0.09139602993817167, "naucs_at_1_diff1": 0.9287341360234302, "naucs_at_3_max": 0.5740851896314072, "naucs_at_3_std": -0.0630919034280346, "naucs_at_3_diff1": 0.8925125605797886, "naucs_at_5_max": 0.8596327419856851, "naucs_at_5_std": 0.36778711484593446, "naucs_at_5_diff1": 0.8903672580143169, "naucs_at_10_max": 0.9162708561685533, "naucs_at_10_std": 0.6028498355864041, "naucs_at_10_diff1": 0.9102829537612158, "naucs_at_20_max": 0.9354741896758731, "naucs_at_20_std": 0.6860744297719135, "naucs_at_20_diff1": 0.9428104575163434, "naucs_at_50_max": 0.9709513435003653, "naucs_at_50_std": 0.8124805477746678, "naucs_at_50_diff1": 0.9564270152505423, "naucs_at_100_max": 0.9626517273576126, "naucs_at_100_std": 0.8382686407896341, "naucs_at_100_diff1": 0.9439775910364117}, "docvqa_subsampled": {"ndcg_at_1": 0.45, "ndcg_at_3": 0.5229, "ndcg_at_5": 0.54071, "ndcg_at_10": 0.56169, "ndcg_at_20": 0.57183, "ndcg_at_50": 0.58375, "ndcg_at_100": 0.5922, "map_at_1": 0.45, "map_at_3": 0.504, "map_at_5": 0.5137, "map_at_10": 0.52214, "map_at_20": 0.52493, "map_at_50": 0.52686, "map_at_100": 0.5276, "recall_at_1": 0.45, "recall_at_3": 0.578, "recall_at_5": 0.622, "recall_at_10": 0.688, "recall_at_20": 0.728, "recall_at_50": 0.788, "recall_at_100": 0.84, "precision_at_1": 0.45, "precision_at_3": 0.19267, "precision_at_5": 0.1244, "precision_at_10": 0.0688, "precision_at_20": 0.0364, "precision_at_50": 0.01576, "precision_at_100": 0.0084, "mrr_at_1": 0.452, "mrr_at_3": 0.5063333333333333, "mrr_at_5": 0.5163333333333334, "mrr_at_10": 0.5238119047619048, "mrr_at_20": 0.5271139985401595, "mrr_at_50": 0.5290200749111619, "mrr_at_100": 0.5299254540408582, "naucs_at_1_max": 0.5212021213906897, "naucs_at_1_std": 0.6806049891966216, "naucs_at_1_diff1": 0.8806495122110918, "naucs_at_3_max": 0.47156835207348163, "naucs_at_3_std": 0.8128002501903935, "naucs_at_3_diff1": 0.8088765466775932, "naucs_at_5_max": 0.4225936642138315, "naucs_at_5_std": 0.8492139709674136, "naucs_at_5_diff1": 0.8048344413805454, "naucs_at_10_max": 0.35700736285971163, "naucs_at_10_std": 0.8817867580935254, "naucs_at_10_diff1": 0.7719340152877696, "naucs_at_20_max": 0.28175167648851784, "naucs_at_20_std": 0.888878902036797, "naucs_at_20_diff1": 0.7607623528676154, "naucs_at_50_max": 0.23570044204275323, "naucs_at_50_std": 0.9034666462989657, "naucs_at_50_diff1": 0.7529716156996781, "naucs_at_100_max": 0.3394943544428075, "naucs_at_100_std": 0.9148932253313685, "naucs_at_100_diff1": 0.7261659302896408}, "arxivqa_subsampled": {"ndcg_at_1": 0.798, "ndcg_at_3": 0.85012, "ndcg_at_5": 0.86165, "ndcg_at_10": 0.86811, "ndcg_at_20": 0.87503, "ndcg_at_50": 0.88012, "ndcg_at_100": 0.88145, "map_at_1": 0.798, "map_at_3": 0.83767, "map_at_5": 0.84407, "map_at_10": 0.84672, "map_at_20": 0.84881, "map_at_50": 0.84959, "map_at_100": 0.84972, "recall_at_1": 0.798, "recall_at_3": 0.886, "recall_at_5": 0.914, "recall_at_10": 0.934, "recall_at_20": 0.96, "recall_at_50": 0.986, "recall_at_100": 0.994, "precision_at_1": 0.798, "precision_at_3": 0.29533, "precision_at_5": 0.1828, "precision_at_10": 0.0934, "precision_at_20": 0.048, "precision_at_50": 0.01972, "precision_at_100": 0.00994, "mrr_at_1": 0.796, "mrr_at_3": 0.837, "mrr_at_5": 0.8426, "mrr_at_10": 0.8454293650793651, "mrr_at_20": 0.8476594700621016, "mrr_at_50": 0.8484996653641844, "mrr_at_100": 0.8485908734354632, "naucs_at_1_max": 0.7429305020644663, "naucs_at_1_std": 0.12389279642324286, "naucs_at_1_diff1": 0.9221054092501996, "naucs_at_3_max": 0.7559059050726685, "naucs_at_3_std": 0.08804335159072413, "naucs_at_3_diff1": 0.9016514891704258, "naucs_at_5_max": 0.7771372114737365, "naucs_at_5_std": 0.08682387683755402, "naucs_at_5_diff1": 0.8953922654333033, "naucs_at_10_max": 0.7693036810683858, "naucs_at_10_std": 0.019508813626457466, "naucs_at_10_diff1": 0.9052853464618175, "naucs_at_20_max": 0.8720821661998126, "naucs_at_20_std": 0.4474089635854335, "naucs_at_20_diff1": 0.9319561157796447, "naucs_at_50_max": 0.8856209150326733, "naucs_at_50_std": 0.2575696945444932, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 0.9564270152505304, "naucs_at_100_std": 0.9564270152505304, "naucs_at_100_diff1": 1.0}, "tabfquad_subsampled": {"ndcg_at_1": 0.83214, "ndcg_at_3": 0.87712, "ndcg_at_5": 0.89235, "ndcg_at_10": 0.89806, "ndcg_at_20": 0.90534, "ndcg_at_50": 0.90678, "ndcg_at_100": 0.90738, "map_at_1": 0.83214, "map_at_3": 0.86667, "map_at_5": 0.87542, "map_at_10": 0.87773, "map_at_20": 0.87975, "map_at_50": 0.87999, "map_at_100": 0.88005, "recall_at_1": 0.83214, "recall_at_3": 0.90714, "recall_at_5": 0.94286, "recall_at_10": 0.96071, "recall_at_20": 0.98929, "recall_at_50": 0.99643, "recall_at_100": 1.0, "precision_at_1": 0.83214, "precision_at_3": 0.30238, "precision_at_5": 0.18857, "precision_at_10": 0.09607, "precision_at_20": 0.04946, "precision_at_50": 0.01993, "precision_at_100": 0.01, "mrr_at_1": 0.8285714285714286, "mrr_at_3": 0.8660714285714286, "mrr_at_5": 0.8739285714285714, "mrr_at_10": 0.8762386621315194, "mrr_at_20": 0.8782158477426059, "mrr_at_50": 0.8784424960942542, "mrr_at_100": 0.8785030287819054, "naucs_at_1_max": 0.4932986555362122, "naucs_at_1_std": 0.22308450615600825, "naucs_at_1_diff1": 0.8669798854793715, "naucs_at_3_max": 0.5318358112475757, "naucs_at_3_std": 0.22662141779788908, "naucs_at_3_diff1": 0.8142641672053457, "naucs_at_5_max": 0.5299077964519151, "naucs_at_5_std": 0.37307422969187604, "naucs_at_5_diff1": 0.7965102707749776, "naucs_at_10_max": 0.4598506069094349, "naucs_at_10_std": 0.4182582123758572, "naucs_at_10_diff1": 0.7816823699176663, "naucs_at_20_max": 0.6640211640211727, "naucs_at_20_std": 0.7424525365701908, "naucs_at_20_diff1": 0.807812013694365, "naucs_at_50_max": 0.8692810457515607, "naucs_at_50_std": 0.8692810457515607, "naucs_at_50_diff1": 0.5541549953314449, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0}, "tatdqa": {"ndcg_at_1": 0.63139, "ndcg_at_3": 0.73814, "ndcg_at_5": 0.76197, "ndcg_at_10": 0.78059, "ndcg_at_20": 0.78639, "ndcg_at_50": 0.79151, "ndcg_at_100": 0.79287, "map_at_1": 0.63139, "map_at_3": 0.71277, "map_at_5": 0.72603, "map_at_10": 0.7338, "map_at_20": 0.73539, "map_at_50": 0.73626, "map_at_100": 0.73638, "recall_at_1": 0.63139, "recall_at_3": 0.81118, "recall_at_5": 0.86891, "recall_at_10": 0.92604, "recall_at_20": 0.94889, "recall_at_50": 0.97414, "recall_at_100": 0.98256, "precision_at_1": 0.63139, "precision_at_3": 0.27039, "precision_at_5": 0.17378, "precision_at_10": 0.0926, "precision_at_20": 0.04744, "precision_at_50": 0.01948, "precision_at_100": 0.00983, "mrr_at_1": 0.6301864101022249, "mrr_at_3": 0.7114652234916817, "mrr_at_5": 0.7254459811585489, "mrr_at_10": 0.7328489343221755, "mrr_at_20": 0.7346180226268433, "mrr_at_50": 0.7354215808563426, "mrr_at_100": 0.7355602372931767, "naucs_at_1_max": 0.15602344202646737, "naucs_at_1_std": -0.23537913065678764, "naucs_at_1_diff1": 0.7995887679987748, "naucs_at_3_max": 0.19948401483418163, "naucs_at_3_std": -0.17461182695422356, "naucs_at_3_diff1": 0.697068028325409, "naucs_at_5_max": 0.2702733206987723, "naucs_at_5_std": -0.07922361501648759, "naucs_at_5_diff1": 0.6924299266004303, "naucs_at_10_max": 0.362723107586991, "naucs_at_10_std": 0.16342176544180184, "naucs_at_10_diff1": 0.6704828732981863, "naucs_at_20_max": 0.44771077518715946, "naucs_at_20_std": 0.31862795330756494, "naucs_at_20_diff1": 0.6391884425916661, "naucs_at_50_max": 0.5273988645048515, "naucs_at_50_std": 0.6199789044521115, "naucs_at_50_diff1": 0.5861085850281988, "naucs_at_100_max": 0.5130808147698965, "naucs_at_100_std": 0.5869401169922615, "naucs_at_100_diff1": 0.5890188930623028}, "shift_project": {"ndcg_at_1": 0.73, "ndcg_at_3": 0.84702, "ndcg_at_5": 0.86293, "ndcg_at_10": 0.86942, "ndcg_at_20": 0.86942, "ndcg_at_50": 0.87131, "ndcg_at_100": 0.87131, "map_at_1": 0.73, "map_at_3": 0.81833, "map_at_5": 0.82683, "map_at_10": 0.82951, "map_at_20": 0.82951, "map_at_50": 0.82978, "map_at_100": 0.82978, "recall_at_1": 0.73, "recall_at_3": 0.93, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.73, "precision_at_3": 0.31, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.76, "mrr_at_3": 0.8383333333333333, "mrr_at_5": 0.8468333333333333, "mrr_at_10": 0.8497499999999999, "mrr_at_20": 0.8497499999999999, "mrr_at_50": 0.8500131578947367, "mrr_at_100": 0.8500131578947367, "naucs_at_1_max": -0.1763746092905308, "naucs_at_1_std": -0.39348129509131297, "naucs_at_1_diff1": 0.6756374216123774, "naucs_at_3_max": -0.018540749633185687, "naucs_at_3_std": -0.1494597839135643, "naucs_at_3_diff1": 0.5246098439375775, "naucs_at_5_max": -0.20572673513849474, "naucs_at_5_std": -0.4344849050731363, "naucs_at_5_diff1": 0.7152194211017747, "naucs_at_10_max": -1.7399626517273863, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": -1.7399626517273863, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_50_max": NaN, "naucs_at_50_std": NaN, "naucs_at_50_diff1": NaN, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN}}
|
|
|
1 |
+
{"tatdqa_test": {"ndcg_at_1": 0.63609, "ndcg_at_3": 0.74203, "ndcg_at_5": 0.768, "ndcg_at_10": 0.78607, "ndcg_at_20": 0.79225, "ndcg_at_100": 0.79794, "ndcg_at_1000": 0.7999, "map_at_1": 0.63609, "map_at_3": 0.71699, "map_at_5": 0.73151, "map_at_10": 0.73908, "map_at_20": 0.74079, "map_at_100": 0.74164, "map_at_1000": 0.74174, "recall_at_1": 0.63609, "recall_at_3": 0.81409, "recall_at_5": 0.87667, "recall_at_10": 0.93196, "recall_at_20": 0.95626, "recall_at_100": 0.98603, "recall_at_1000": 1.0, "precision_at_1": 0.63609, "precision_at_3": 0.27136, "precision_at_5": 0.17533, "precision_at_10": 0.0932, "precision_at_20": 0.04781, "precision_at_100": 0.00986, "precision_at_1000": 0.001, "mrr_at_1": 0.6385176184690158, "mrr_at_3": 0.7178007290400972, "mrr_at_5": 0.7320473876063184, "mrr_at_10": 0.7396892900538101, "mrr_at_20": 0.7415524953882554, "mrr_at_100": 0.7423961819039153, "mrr_at_1000": 0.7424935268539138, "naucs_at_1_max": 0.15885418581164265, "naucs_at_1_std": -0.24722089872760156, "naucs_at_1_diff1": 0.8058133847238692, "naucs_at_3_max": 0.1716690156858292, "naucs_at_3_std": -0.23136832603804022, "naucs_at_3_diff1": 0.7036045863253637, "naucs_at_5_max": 0.23427257157466833, "naucs_at_5_std": -0.14847651919138252, "naucs_at_5_diff1": 0.6841422539102495, "naucs_at_10_max": 0.3098360565704996, "naucs_at_10_std": 0.086527322442512, "naucs_at_10_diff1": 0.6375151476027249, "naucs_at_20_max": 0.35048069176464824, "naucs_at_20_std": 0.20141013843756972, "naucs_at_20_diff1": 0.6104227641459228, "naucs_at_100_max": 0.38053080427833685, "naucs_at_100_std": 0.4793427449913261, "naucs_at_100_diff1": 0.4570863373115207, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shiftproject_test": {"ndcg_at_1": 0.75, "ndcg_at_3": 0.85202, "ndcg_at_5": 0.87355, "ndcg_at_10": 0.87972, "ndcg_at_20": 0.87972, "ndcg_at_100": 0.8816, "ndcg_at_1000": 0.8816, "map_at_1": 0.75, "map_at_3": 0.82833, "map_at_5": 0.84083, "map_at_10": 0.84319, "map_at_20": 0.84319, "map_at_100": 0.84345, "map_at_1000": 0.84345, "recall_at_1": 0.75, "recall_at_3": 0.92, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.75, "precision_at_3": 0.30667, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.75, "mrr_at_3": 0.8333333333333333, "mrr_at_5": 0.8428333333333333, "mrr_at_10": 0.8455119047619047, "mrr_at_20": 0.8455119047619047, "mrr_at_100": 0.8457619047619047, "mrr_at_1000": 0.8457619047619047, "naucs_at_1_max": -0.1358032443746735, "naucs_at_1_std": -0.3977394034536896, "naucs_at_1_diff1": 0.6171847200418629, "naucs_at_3_max": 0.15429505135387495, "naucs_at_3_std": -0.00840336134453759, "naucs_at_3_diff1": 0.40581232492997227, "naucs_at_5_max": -0.20572673513849474, "naucs_at_5_std": -0.4344849050731363, "naucs_at_5_diff1": 0.6374105197634568, "naucs_at_10_max": -1.7399626517273863, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": -1.7399626517273863, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.98, "ndcg_at_3": 0.985, "ndcg_at_5": 0.98931, "ndcg_at_10": 0.98931, "ndcg_at_20": 0.98931, "ndcg_at_100": 0.98931, "ndcg_at_1000": 0.98931, "map_at_1": 0.98, "map_at_3": 0.98333, "map_at_5": 0.98583, "map_at_10": 0.98583, "map_at_20": 0.98583, "map_at_100": 0.98583, "map_at_1000": 0.98583, "recall_at_1": 0.98, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.98, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.98, "mrr_at_3": 0.985, "mrr_at_5": 0.9875, "mrr_at_10": 0.9875, "mrr_at_20": 0.9875, "mrr_at_100": 0.9875, "mrr_at_1000": 0.9875, "naucs_at_1_max": 0.8611111111111092, "naucs_at_1_std": -0.367413632119516, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": -0.1713352007469878, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.85, "ndcg_at_3": 0.9294, "ndcg_at_5": 0.93371, "ndcg_at_10": 0.93672, "ndcg_at_20": 0.93672, "ndcg_at_100": 0.93672, "ndcg_at_1000": 0.93672, "map_at_1": 0.85, "map_at_3": 0.91167, "map_at_5": 0.91417, "map_at_10": 0.91528, "map_at_20": 0.91528, "map_at_100": 0.91528, "map_at_1000": 0.91528, "recall_at_1": 0.85, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.85, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.86, "mrr_at_3": 0.9166666666666667, "mrr_at_5": 0.9191666666666667, "mrr_at_10": 0.9204166666666667, "mrr_at_20": 0.9204166666666667, "mrr_at_100": 0.9204166666666667, "mrr_at_1000": 0.9204166666666667, "naucs_at_1_max": 0.6291246339082338, "naucs_at_1_std": 0.3065408395704526, "naucs_at_1_diff1": 0.8960299381711682, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.5401493930905577, "naucs_at_3_diff1": 0.9346405228758099, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.7222222222222276, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "arxivqa_test_subsampled": {"ndcg_at_1": 0.794, "ndcg_at_3": 0.85059, "ndcg_at_5": 0.86058, "ndcg_at_10": 0.86646, "ndcg_at_20": 0.87266, "ndcg_at_100": 0.87945, "ndcg_at_1000": 0.88028, "map_at_1": 0.794, "map_at_3": 0.83633, "map_at_5": 0.84193, "map_at_10": 0.84439, "map_at_20": 0.84617, "map_at_100": 0.84714, "map_at_1000": 0.84718, "recall_at_1": 0.794, "recall_at_3": 0.892, "recall_at_5": 0.916, "recall_at_10": 0.934, "recall_at_20": 0.958, "recall_at_100": 0.994, "recall_at_1000": 1.0, "precision_at_1": 0.794, "precision_at_3": 0.29733, "precision_at_5": 0.1832, "precision_at_10": 0.0934, "precision_at_20": 0.0479, "precision_at_100": 0.00994, "precision_at_1000": 0.001, "mrr_at_1": 0.79, "mrr_at_3": 0.8376666666666667, "mrr_at_5": 0.8412666666666666, "mrr_at_10": 0.8437269841269841, "mrr_at_20": 0.8457550827827144, "mrr_at_100": 0.846530343562146, "mrr_at_1000": 0.8465733155735391, "naucs_at_1_max": 0.7325193686810741, "naucs_at_1_std": 0.14078724474104354, "naucs_at_1_diff1": 0.9224520481739498, "naucs_at_3_max": 0.7665170932709435, "naucs_at_3_std": 0.14292698257499137, "naucs_at_3_diff1": 0.8843279673150137, "naucs_at_5_max": 0.7890600684718345, "naucs_at_5_std": 0.14575830332132672, "naucs_at_5_diff1": 0.8787848472722453, "naucs_at_10_max": 0.7557932263814637, "naucs_at_10_std": 0.08308575955634576, "naucs_at_10_diff1": 0.9008290184760781, "naucs_at_20_max": 0.8719487795118063, "naucs_at_20_std": 0.3432484104753018, "naucs_at_20_diff1": 0.914965986394557, "naucs_at_100_max": 0.9564270152505304, "naucs_at_100_std": 0.9564270152505304, "naucs_at_100_diff1": 0.9074074074073771, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_test_subsampled": {"ndcg_at_1": 0.50111, "ndcg_at_3": 0.57079, "ndcg_at_5": 0.5935, "ndcg_at_10": 0.60991, "ndcg_at_20": 0.62193, "ndcg_at_100": 0.64089, "ndcg_at_1000": 0.65609, "map_at_1": 0.50111, "map_at_3": 0.55285, "map_at_5": 0.56537, "map_at_10": 0.57209, "map_at_20": 0.57552, "map_at_100": 0.578, "map_at_1000": 0.57855, "recall_at_1": 0.50111, "recall_at_3": 0.62306, "recall_at_5": 0.67849, "recall_at_10": 0.72949, "recall_at_20": 0.77605, "recall_at_100": 0.88027, "recall_at_1000": 1.0, "precision_at_1": 0.50111, "precision_at_3": 0.20769, "precision_at_5": 0.1357, "precision_at_10": 0.07295, "precision_at_20": 0.0388, "precision_at_100": 0.0088, "precision_at_1000": 0.001, "mrr_at_1": 0.5033259423503326, "mrr_at_3": 0.5543237250554324, "mrr_at_5": 0.5655210643015521, "mrr_at_10": 0.5733132720937598, "mrr_at_20": 0.5761961809258666, "mrr_at_100": 0.5788361852971281, "mrr_at_1000": 0.5793368852941103, "naucs_at_1_max": 0.45963973840308553, "naucs_at_1_std": 0.6437091894280429, "naucs_at_1_diff1": 0.8759915182195691, "naucs_at_3_max": 0.41721087400698254, "naucs_at_3_std": 0.7560034210706996, "naucs_at_3_diff1": 0.8333236691100351, "naucs_at_5_max": 0.3576379403501726, "naucs_at_5_std": 0.8142067757233517, "naucs_at_5_diff1": 0.8324148371504299, "naucs_at_10_max": 0.3077029630906093, "naucs_at_10_std": 0.8506353277336524, "naucs_at_10_diff1": 0.823879626321585, "naucs_at_20_max": 0.23370693314651628, "naucs_at_20_std": 0.883867635902675, "naucs_at_20_diff1": 0.8055657709900838, "naucs_at_100_max": 0.2482445957598351, "naucs_at_100_std": 0.9020390341179869, "naucs_at_100_diff1": 0.7987699160382137, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.98155, "ndcg_at_5": 0.98155, "ndcg_at_10": 0.98155, "ndcg_at_20": 0.98155, "ndcg_at_100": 0.98155, "ndcg_at_1000": 0.98155, "map_at_1": 0.95, "map_at_3": 0.975, "map_at_5": 0.975, "map_at_10": 0.975, "map_at_20": 0.975, "map_at_100": 0.975, "map_at_1000": 0.975, "recall_at_1": 0.95, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.975, "mrr_at_5": 0.975, "mrr_at_10": 0.975, "mrr_at_20": 0.975, "mrr_at_100": 0.975, "mrr_at_1000": 0.975, "naucs_at_1_max": 0.8888888888888874, "naucs_at_1_std": 0.43286647992530286, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_test_subsampled": {"ndcg_at_1": 0.85628, "ndcg_at_3": 0.89784, "ndcg_at_5": 0.90708, "ndcg_at_10": 0.90959, "ndcg_at_20": 0.91531, "ndcg_at_100": 0.91797, "ndcg_at_1000": 0.91892, "map_at_1": 0.85628, "map_at_3": 0.88833, "map_at_5": 0.89349, "map_at_10": 0.89446, "map_at_20": 0.89608, "map_at_100": 0.89645, "map_at_1000": 0.89647, "recall_at_1": 0.85628, "recall_at_3": 0.9251, "recall_at_5": 0.94737, "recall_at_10": 0.95547, "recall_at_20": 0.97773, "recall_at_100": 0.9919, "recall_at_1000": 1.0, "precision_at_1": 0.85628, "precision_at_3": 0.30837, "precision_at_5": 0.18947, "precision_at_10": 0.09555, "precision_at_20": 0.04889, "precision_at_100": 0.00992, "precision_at_1000": 0.001, "mrr_at_1": 0.8522267206477733, "mrr_at_3": 0.888663967611336, "mrr_at_5": 0.8919028340080972, "mrr_at_10": 0.89323227941649, "mrr_at_20": 0.8948389638926076, "mrr_at_100": 0.8952180496071648, "mrr_at_1000": 0.8952400720283006, "naucs_at_1_max": 0.6091042742691317, "naucs_at_1_std": 0.06828770581523343, "naucs_at_1_diff1": 0.9363834323697563, "naucs_at_3_max": 0.5528244624347816, "naucs_at_3_std": -0.03701021164354884, "naucs_at_3_diff1": 0.9011217865736105, "naucs_at_5_max": 0.8026139487593744, "naucs_at_5_std": 0.27628703636986895, "naucs_at_5_diff1": 0.911427543567563, "naucs_at_10_max": 0.8451953613222364, "naucs_at_10_std": 0.4649226732413682, "naucs_at_10_diff1": 0.9012598903100626, "naucs_at_20_max": 0.9178862612438203, "naucs_at_20_std": 0.689729386374777, "naucs_at_20_diff1": 0.927259949634811, "naucs_at_100_max": 0.9346992729676393, "naucs_at_100_std": 0.8559234324931341, "naucs_at_100_diff1": 0.9346992729676393, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.96262, "ndcg_at_5": 0.96649, "ndcg_at_10": 0.96964, "ndcg_at_20": 0.96964, "ndcg_at_100": 0.97185, "ndcg_at_1000": 0.97185, "map_at_1": 0.95, "map_at_3": 0.96, "map_at_5": 0.962, "map_at_10": 0.96325, "map_at_20": 0.96325, "map_at_100": 0.9637, "map_at_1000": 0.9637, "recall_at_1": 0.95, "recall_at_3": 0.97, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.32333, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.9583333333333333, "mrr_at_5": 0.9603333333333333, "mrr_at_10": 0.9620000000000001, "mrr_at_20": 0.9620000000000001, "mrr_at_100": 0.9624545454545455, "mrr_at_1000": 0.9624545454545455, "naucs_at_1_max": 0.5589169000933719, "naucs_at_1_std": -0.5752567693744151, "naucs_at_1_diff1": 0.9738562091503253, "naucs_at_3_max": 0.8692810457516398, "naucs_at_3_std": 0.20121381886088488, "naucs_at_3_diff1": 0.9564270152505466, "naucs_at_5_max": 0.8692810457516353, "naucs_at_5_std": 0.24042950513538955, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": 0.35807656395891135, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.8692810457516413, "naucs_at_20_std": 0.35807656395891135, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tabfquad_test_subsampled": {"ndcg_at_1": 0.83929, "ndcg_at_3": 0.88605, "ndcg_at_5": 0.89666, "ndcg_at_10": 0.90146, "ndcg_at_20": 0.90988, "ndcg_at_100": 0.91198, "ndcg_at_1000": 0.91198, "map_at_1": 0.83929, "map_at_3": 0.875, "map_at_5": 0.88107, "map_at_10": 0.88315, "map_at_20": 0.88562, "map_at_100": 0.88594, "map_at_1000": 0.88594, "recall_at_1": 0.83929, "recall_at_3": 0.91786, "recall_at_5": 0.94286, "recall_at_10": 0.95714, "recall_at_20": 0.98929, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.83929, "precision_at_3": 0.30595, "precision_at_5": 0.18857, "precision_at_10": 0.09571, "precision_at_20": 0.04946, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8321428571428572, "mrr_at_3": 0.8714285714285714, "mrr_at_5": 0.8775, "mrr_at_10": 0.8795833333333333, "mrr_at_20": 0.8820347540930248, "mrr_at_100": 0.8823579864162571, "mrr_at_1000": 0.8823579864162571, "naucs_at_1_max": 0.4780421259412858, "naucs_at_1_std": 0.21763614536723871, "naucs_at_1_diff1": 0.8584742988104332, "naucs_at_3_max": 0.5309341127755455, "naucs_at_3_std": 0.3166889944383569, "naucs_at_3_diff1": 0.7913774205334293, "naucs_at_5_max": 0.5380777310924367, "naucs_at_5_std": 0.37307422969187604, "naucs_at_5_diff1": 0.7886321195144746, "naucs_at_10_max": 0.5157563025210077, "naucs_at_10_std": 0.4667366946778735, "naucs_at_10_diff1": 0.7631107998755065, "naucs_at_20_max": 0.6640211640211727, "naucs_at_20_std": 0.7424525365701908, "naucs_at_20_diff1": 0.807812013694365, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
|