File size: 16,211 Bytes
69c902f
1
{"tatdqa_test": {"ndcg_at_1": 0.53949, "ndcg_at_3": 0.66215, "ndcg_at_5": 0.69165, "ndcg_at_10": 0.71631, "ndcg_at_20": 0.72958, "ndcg_at_100": 0.73809, "ndcg_at_1000": 0.73951, "map_at_1": 0.53949, "map_at_3": 0.63264, "map_at_5": 0.64914, "map_at_10": 0.65937, "map_at_20": 0.66313, "map_at_100": 0.66447, "map_at_1000": 0.66453, "recall_at_1": 0.53949, "recall_at_3": 0.74727, "recall_at_5": 0.81835, "recall_at_10": 0.89429, "recall_at_20": 0.94593, "recall_at_100": 0.98967, "recall_at_1000": 1.0, "precision_at_1": 0.53949, "precision_at_3": 0.24909, "precision_at_5": 0.16367, "precision_at_10": 0.08943, "precision_at_20": 0.0473, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.5449574726609964, "mrr_at_3": 0.6347711624139327, "mrr_at_5": 0.6512656946132037, "mrr_at_10": 0.6619129587841618, "mrr_at_20": 0.6656992491084615, "mrr_at_100": 0.667012232750466, "mrr_at_1000": 0.6670791140108373, "naucs_at_1_max": -0.0334734112615305, "naucs_at_1_std": -0.20884288458418684, "naucs_at_1_diff1": 0.7337451318520049, "naucs_at_3_max": -0.08800306907838781, "naucs_at_3_std": -0.18671535880941095, "naucs_at_3_diff1": 0.6055314422160362, "naucs_at_5_max": -0.06421279169560966, "naucs_at_5_std": -0.17447534263241596, "naucs_at_5_diff1": 0.5890832895428815, "naucs_at_10_max": 0.05473124292571762, "naucs_at_10_std": -0.05782565218238013, "naucs_at_10_diff1": 0.5132330847781013, "naucs_at_20_max": 0.16949349894874327, "naucs_at_20_std": 0.19657303631291392, "naucs_at_20_diff1": 0.5202353204000343, "naucs_at_100_max": 0.1476746438547578, "naucs_at_100_std": 0.8927449198480922, "naucs_at_100_diff1": 0.5113784217674133, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shiftproject_test": {"ndcg_at_1": 0.64, "ndcg_at_3": 0.78988, "ndcg_at_5": 0.80192, "ndcg_at_10": 0.82138, "ndcg_at_20": 0.82366, "ndcg_at_100": 0.82566, "ndcg_at_1000": 0.82566, "map_at_1": 0.64, "map_at_3": 0.755, "map_at_5": 0.7615, "map_at_10": 0.76956, "map_at_20": 0.77006, "map_at_100": 0.77039, "map_at_1000": 0.77039, "recall_at_1": 0.64, "recall_at_3": 0.89, "recall_at_5": 0.92, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.64, "precision_at_3": 0.29667, "precision_at_5": 0.184, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.66, "mrr_at_3": 0.7716666666666667, "mrr_at_5": 0.7776666666666666, "mrr_at_10": 0.7841746031746031, "mrr_at_20": 0.7847009189640768, "mrr_at_100": 0.7850234996092381, "mrr_at_1000": 0.7850234996092381, "naucs_at_1_max": 0.4629741984927832, "naucs_at_1_std": -0.17893409119938702, "naucs_at_1_diff1": 0.8092827947375144, "naucs_at_3_max": 0.4795120694098431, "naucs_at_3_std": 0.02830512842539425, "naucs_at_3_diff1": 0.7682759213125999, "naucs_at_5_max": 0.36490429505135485, "naucs_at_5_std": -0.015114379084964233, "naucs_at_5_diff1": 0.7572362278244639, "naucs_at_10_max": 0.7957516339869297, "naucs_at_10_std": 0.24042950513538955, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": 0.35807656395891135, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.97, "ndcg_at_3": 0.98762, "ndcg_at_5": 0.98762, "ndcg_at_10": 0.98762, "ndcg_at_20": 0.98762, "ndcg_at_100": 0.98762, "ndcg_at_1000": 0.98762, "map_at_1": 0.97, "map_at_3": 0.98333, "map_at_5": 0.98333, "map_at_10": 0.98333, "map_at_20": 0.98333, "map_at_100": 0.98333, "map_at_1000": 0.98333, "recall_at_1": 0.97, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.97, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.97, "mrr_at_3": 0.9833333333333333, "mrr_at_5": 0.9833333333333333, "mrr_at_10": 0.9833333333333333, "mrr_at_20": 0.9833333333333333, "mrr_at_100": 0.9833333333333333, "mrr_at_1000": 0.9833333333333333, "naucs_at_1_max": 0.516962340491753, "naucs_at_1_std": -0.7268907563025234, "naucs_at_1_diff1": 0.912854030501093, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.95786, "ndcg_at_5": 0.95786, "ndcg_at_10": 0.96075, "ndcg_at_20": 0.96354, "ndcg_at_100": 0.96354, "ndcg_at_1000": 0.96354, "map_at_1": 0.92, "map_at_3": 0.95, "map_at_5": 0.95, "map_at_10": 0.951, "map_at_20": 0.95191, "map_at_100": 0.95191, "map_at_1000": 0.95191, "recall_at_1": 0.92, "recall_at_3": 0.98, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32667, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.93, "mrr_at_3": 0.955, "mrr_at_5": 0.955, "mrr_at_10": 0.9561111111111111, "mrr_at_20": 0.9570202020202021, "mrr_at_100": 0.9570202020202021, "mrr_at_1000": 0.9570202020202021, "naucs_at_1_max": 0.6128618113912231, "naucs_at_1_std": -0.18464052287581692, "naucs_at_1_diff1": 0.9279295051353874, "naucs_at_3_max": 0.6790382819794457, "naucs_at_3_std": -0.9556489262371661, "naucs_at_3_diff1": 0.7770774976657261, "naucs_at_5_max": 0.6790382819794609, "naucs_at_5_std": -0.9556489262371616, "naucs_at_5_diff1": 0.7770774976657324, "naucs_at_10_max": 1.0, "naucs_at_10_std": -0.1713352007469681, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "arxivqa_test_subsampled": {"ndcg_at_1": 0.794, "ndcg_at_3": 0.84459, "ndcg_at_5": 0.85449, "ndcg_at_10": 0.86546, "ndcg_at_20": 0.8743, "ndcg_at_100": 0.87922, "ndcg_at_1000": 0.87947, "map_at_1": 0.794, "map_at_3": 0.83233, "map_at_5": 0.83783, "map_at_10": 0.84234, "map_at_20": 0.8449, "map_at_100": 0.84563, "map_at_1000": 0.84564, "recall_at_1": 0.794, "recall_at_3": 0.88, "recall_at_5": 0.904, "recall_at_10": 0.938, "recall_at_20": 0.972, "recall_at_100": 0.998, "recall_at_1000": 1.0, "precision_at_1": 0.794, "precision_at_3": 0.29333, "precision_at_5": 0.1808, "precision_at_10": 0.0938, "precision_at_20": 0.0486, "precision_at_100": 0.00998, "precision_at_1000": 0.001, "mrr_at_1": 0.8, "mrr_at_3": 0.835, "mrr_at_5": 0.8406, "mrr_at_10": 0.8454952380952381, "mrr_at_20": 0.8477195774383856, "mrr_at_100": 0.8484620608713013, "mrr_at_1000": 0.8484709104288234, "naucs_at_1_max": 0.0921885755293141, "naucs_at_1_std": -0.35980101639808415, "naucs_at_1_diff1": 0.8887966764466101, "naucs_at_3_max": 0.03956411072224086, "naucs_at_3_std": -0.46532771237670856, "naucs_at_3_diff1": 0.8454183900731786, "naucs_at_5_max": -0.04559601618425514, "naucs_at_5_std": -0.5235274665421772, "naucs_at_5_diff1": 0.824356131341426, "naucs_at_10_max": -0.17657600674678867, "naucs_at_10_std": -0.6375711574952612, "naucs_at_10_diff1": 0.8559681937291044, "naucs_at_20_max": -0.2462318260637645, "naucs_at_20_std": -0.44294384420435945, "naucs_at_20_diff1": 0.8421035080698966, "naucs_at_100_max": -1.739962651727529, "naucs_at_100_std": 0.86928104575168, "naucs_at_100_diff1": 0.86928104575168, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_test_subsampled": {"ndcg_at_1": 0.48115, "ndcg_at_3": 0.54395, "ndcg_at_5": 0.56303, "ndcg_at_10": 0.59152, "ndcg_at_20": 0.60608, "ndcg_at_100": 0.63062, "ndcg_at_1000": 0.64237, "map_at_1": 0.48115, "map_at_3": 0.52882, "map_at_5": 0.53936, "map_at_10": 0.55142, "map_at_20": 0.5554, "map_at_100": 0.55876, "map_at_1000": 0.55927, "recall_at_1": 0.48115, "recall_at_3": 0.58758, "recall_at_5": 0.63415, "recall_at_10": 0.72062, "recall_at_20": 0.77827, "recall_at_100": 0.91131, "recall_at_1000": 1.0, "precision_at_1": 0.48115, "precision_at_3": 0.19586, "precision_at_5": 0.12683, "precision_at_10": 0.07206, "precision_at_20": 0.03891, "precision_at_100": 0.00911, "precision_at_1000": 0.001, "mrr_at_1": 0.4811529933481153, "mrr_at_3": 0.5266075388026608, "mrr_at_5": 0.5385809312638581, "mrr_at_10": 0.5498381022771267, "mrr_at_20": 0.5543275170175831, "mrr_at_100": 0.5577756981224631, "mrr_at_1000": 0.5582758328447689, "naucs_at_1_max": -0.527917263354622, "naucs_at_1_std": -0.1080812468189484, "naucs_at_1_diff1": 0.7227629939383107, "naucs_at_3_max": -0.6118911878969437, "naucs_at_3_std": -0.08634361823242345, "naucs_at_3_diff1": 0.6544583288339624, "naucs_at_5_max": -0.6147233798629437, "naucs_at_5_std": -0.05001480088913278, "naucs_at_5_diff1": 0.6274316175842508, "naucs_at_10_max": -0.5736822403348542, "naucs_at_10_std": 0.10290355781749885, "naucs_at_10_diff1": 0.5222611253262611, "naucs_at_20_max": -0.6590539872675643, "naucs_at_20_std": 0.15725472054681322, "naucs_at_20_diff1": 0.5023655950505916, "naucs_at_100_max": -0.6858625437863338, "naucs_at_100_std": 0.5461732756560037, "naucs_at_100_diff1": 0.4697645018740936, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.96786, "ndcg_at_5": 0.96786, "ndcg_at_10": 0.96786, "ndcg_at_20": 0.96786, "ndcg_at_100": 0.96786, "ndcg_at_1000": 0.96786, "map_at_1": 0.92, "map_at_3": 0.95667, "map_at_5": 0.95667, "map_at_10": 0.95667, "map_at_20": 0.95667, "map_at_100": 0.95667, "map_at_1000": 0.95667, "recall_at_1": 0.92, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9566666666666667, "mrr_at_5": 0.9566666666666667, "mrr_at_10": 0.9566666666666667, "mrr_at_20": 0.9566666666666667, "mrr_at_100": 0.9566666666666667, "mrr_at_1000": 0.9566666666666667, "naucs_at_1_max": 0.5962885154061613, "naucs_at_1_std": -0.4017273576097105, "naucs_at_1_diff1": 0.8999183006535953, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_test_subsampled": {"ndcg_at_1": 0.81174, "ndcg_at_3": 0.86652, "ndcg_at_5": 0.87828, "ndcg_at_10": 0.88902, "ndcg_at_20": 0.89201, "ndcg_at_100": 0.89389, "ndcg_at_1000": 0.89549, "map_at_1": 0.81174, "map_at_3": 0.85324, "map_at_5": 0.85982, "map_at_10": 0.8644, "map_at_20": 0.86518, "map_at_100": 0.86545, "map_at_1000": 0.86552, "recall_at_1": 0.81174, "recall_at_3": 0.90486, "recall_at_5": 0.9332, "recall_at_10": 0.96559, "recall_at_20": 0.97773, "recall_at_100": 0.98785, "recall_at_1000": 1.0, "precision_at_1": 0.81174, "precision_at_3": 0.30162, "precision_at_5": 0.18664, "precision_at_10": 0.09656, "precision_at_20": 0.04889, "precision_at_100": 0.00988, "precision_at_1000": 0.001, "mrr_at_1": 0.8097165991902834, "mrr_at_3": 0.8522267206477733, "mrr_at_5": 0.8592105263157894, "mrr_at_10": 0.8637250176723861, "mrr_at_20": 0.8644974364130554, "mrr_at_100": 0.8647625223289692, "mrr_at_1000": 0.8648326023525785, "naucs_at_1_max": 0.16357820505380594, "naucs_at_1_std": -0.10491602700149766, "naucs_at_1_diff1": 0.8993894621193739, "naucs_at_3_max": 0.09251495457436099, "naucs_at_3_std": -0.1165161855271182, "naucs_at_3_diff1": 0.8399006870475763, "naucs_at_5_max": 0.10562338405694693, "naucs_at_5_std": 0.023309738467444545, "naucs_at_5_diff1": 0.8349812689239322, "naucs_at_10_max": -0.046794544240869776, "naucs_at_10_std": 0.5491444567412297, "naucs_at_10_diff1": 0.8697672939881426, "naucs_at_20_max": -0.14843223163245256, "naucs_at_20_std": 0.8317704578156119, "naucs_at_20_diff1": 0.834349850908432, "naucs_at_100_max": -0.4646668222236812, "naucs_at_100_std": 0.8031690226539966, "naucs_at_100_diff1": 0.882182045984622, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.92417, "ndcg_at_5": 0.92417, "ndcg_at_10": 0.93129, "ndcg_at_20": 0.93379, "ndcg_at_100": 0.93585, "ndcg_at_1000": 0.93585, "map_at_1": 0.87, "map_at_3": 0.91167, "map_at_5": 0.91167, "map_at_10": 0.915, "map_at_20": 0.91567, "map_at_100": 0.91602, "map_at_1000": 0.91602, "recall_at_1": 0.87, "recall_at_3": 0.96, "recall_at_5": 0.96, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.32, "precision_at_5": 0.192, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.87, "mrr_at_3": 0.9116666666666667, "mrr_at_5": 0.9136666666666666, "mrr_at_10": 0.9153333333333333, "mrr_at_20": 0.916047619047619, "mrr_at_100": 0.9164047619047619, "mrr_at_1000": 0.9164047619047619, "naucs_at_1_max": 0.403836184551581, "naucs_at_1_std": -0.3661778863956149, "naucs_at_1_diff1": 0.9072428349255718, "naucs_at_3_max": 0.324463118580764, "naucs_at_3_std": -0.5880018674136294, "naucs_at_3_diff1": 0.9346405228758151, "naucs_at_5_max": 0.3244631185807679, "naucs_at_5_std": -0.5880018674136228, "naucs_at_5_diff1": 0.9346405228758139, "naucs_at_10_max": -0.22035480859009696, "naucs_at_10_std": -0.9556489262371534, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": -0.5634920634920767, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tabfquad_test_subsampled": {"ndcg_at_1": 0.89286, "ndcg_at_3": 0.93333, "ndcg_at_5": 0.93933, "ndcg_at_10": 0.94261, "ndcg_at_20": 0.94516, "ndcg_at_100": 0.94576, "ndcg_at_1000": 0.94576, "map_at_1": 0.89286, "map_at_3": 0.92381, "map_at_5": 0.9272, "map_at_10": 0.92844, "map_at_20": 0.92906, "map_at_100": 0.92912, "map_at_1000": 0.92912, "recall_at_1": 0.89286, "recall_at_3": 0.96071, "recall_at_5": 0.975, "recall_at_10": 0.98571, "recall_at_20": 0.99643, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.89286, "precision_at_3": 0.32024, "precision_at_5": 0.195, "precision_at_10": 0.09857, "precision_at_20": 0.04982, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8928571428571429, "mrr_at_3": 0.9238095238095239, "mrr_at_5": 0.927202380952381, "mrr_at_10": 0.9284920634920635, "mrr_at_20": 0.929124149659864, "mrr_at_100": 0.9291826976692317, "mrr_at_1000": 0.9291826976692317, "naucs_at_1_max": 0.6610675039246472, "naucs_at_1_std": -0.19158555729984375, "naucs_at_1_diff1": 0.8372841444270012, "naucs_at_3_max": 0.8222137339784428, "naucs_at_3_std": -0.227527374586203, "naucs_at_3_diff1": 0.8885918003565113, "naucs_at_5_max": 0.9019607843137233, "naucs_at_5_std": 0.0008670134720625009, "naucs_at_5_diff1": 0.9042950513538675, "naucs_at_10_max": 0.8978758169934754, "naucs_at_10_std": 0.09570494864613299, "naucs_at_10_diff1": 0.9346405228758147, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.8692810457516478, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}