{ "metrics": { "syntheticDocQA_artificial_intelligence_test": { "ndcg_at_1": 0.97, "ndcg_at_3": 0.98893, "ndcg_at_5": 0.98893, "ndcg_at_10": 0.98893, "ndcg_at_20": 0.98893, "ndcg_at_50": 0.98893, "ndcg_at_100": 0.98893, "map_at_1": 0.97, "map_at_3": 0.985, "map_at_5": 0.985, "map_at_10": 0.985, "map_at_20": 0.985, "map_at_50": 0.985, "map_at_100": 0.985, "recall_at_1": 0.97, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.97, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.98, "mrr_at_3": 0.99, "mrr_at_5": 0.99, "mrr_at_10": 0.99, "mrr_at_20": 0.99, "mrr_at_50": 0.99, "mrr_at_100": 0.99, "naucs_at_1_max": 0.6661998132586402, "naucs_at_1_std": -0.2198879551820713, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "tabfquad_test_subsampled": { "ndcg_at_1": 0.86786, "ndcg_at_3": 0.91012, "ndcg_at_5": 0.92026, "ndcg_at_10": 0.92608, "ndcg_at_20": 0.92787, "ndcg_at_50": 0.93015, "ndcg_at_100": 0.93015, "map_at_1": 0.86786, "map_at_3": 0.9, "map_at_5": 0.90554, "map_at_10": 0.90797, "map_at_20": 0.90845, "map_at_50": 0.90888, "map_at_100": 0.90888, "recall_at_1": 0.86786, "recall_at_3": 0.93929, "recall_at_5": 0.96429, "recall_at_10": 0.98214, "recall_at_20": 0.98929, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.86786, "precision_at_3": 0.3131, "precision_at_5": 0.19286, "precision_at_10": 0.09821, "precision_at_20": 0.04946, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.8714285714285714, "mrr_at_3": 0.9017857142857143, "mrr_at_5": 0.9073214285714286, "mrr_at_10": 0.9097123015873015, "mrr_at_20": 0.9102083333333333, "mrr_at_50": 0.9106373626373626, "mrr_at_100": 0.9106373626373626, "naucs_at_1_max": 0.5190267677952406, "naucs_at_1_std": 0.20565850861417337, "naucs_at_1_diff1": 0.8928115307425651, "naucs_at_3_max": 0.6486790794749274, "naucs_at_3_std": 0.4622947218102944, "naucs_at_3_diff1": 0.9004503762289242, "naucs_at_5_max": 0.8830532212885142, "naucs_at_5_std": 0.6742296918767519, "naucs_at_5_diff1": 0.9460784313725463, "naucs_at_10_max": 0.8216619981325874, "naucs_at_10_std": 0.4857142857142915, "naucs_at_10_diff1": 0.9183006535947714, "naucs_at_20_max": 1.0, "naucs_at_20_std": 0.9564270152505505, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "shiftproject_test": { "ndcg_at_1": 0.76, "ndcg_at_3": 0.87833, "ndcg_at_5": 0.88651, "ndcg_at_10": 0.8894, "ndcg_at_20": 0.8894, "ndcg_at_50": 0.89133, "ndcg_at_100": 0.89133, "map_at_1": 0.76, "map_at_3": 0.85, "map_at_5": 0.8545, "map_at_10": 0.8555, "map_at_20": 0.8555, "map_at_50": 0.85579, "map_at_100": 0.85579, "recall_at_1": 0.76, "recall_at_3": 0.96, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.76, "precision_at_3": 0.32, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.77, "mrr_at_3": 0.86, "mrr_at_5": 0.8645, "mrr_at_10": 0.8656111111111112, "mrr_at_20": 0.8656111111111112, "mrr_at_50": 0.86590522875817, "mrr_at_100": 0.86590522875817, "naucs_at_1_max": 0.19752098295405382, "naucs_at_1_std": -0.1442415851864674, "naucs_at_1_diff1": 0.8336722332785322, "naucs_at_3_max": 0.19946311858076338, "naucs_at_3_std": -0.029178338001868868, "naucs_at_3_diff1": 0.8978758169934614, "naucs_at_5_max": 0.540149393090577, "naucs_at_5_std": 0.24042950513538955, "naucs_at_5_diff1": 0.861111111111116, "naucs_at_10_max": 0.7222222222222276, "naucs_at_10_std": 0.35807656395891135, "naucs_at_10_diff1": 0.7222222222222276, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": 0.35807656395891135, "naucs_at_20_diff1": 0.7222222222222276, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "syntheticDocQA_government_reports_test": { "ndcg_at_1": 0.92, "ndcg_at_3": 0.96286, "ndcg_at_5": 0.96286, "ndcg_at_10": 0.96286, "ndcg_at_20": 0.96565, "ndcg_at_50": 0.96565, "ndcg_at_100": 0.96565, "map_at_1": 0.92, "map_at_3": 0.95333, "map_at_5": 0.95333, "map_at_10": 0.95333, "map_at_20": 0.95424, "map_at_50": 0.95424, "map_at_100": 0.95424, "recall_at_1": 0.92, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.93, "mrr_at_3": 0.96, "mrr_at_5": 0.96, "mrr_at_10": 0.961, "mrr_at_20": 0.961, "mrr_at_50": 0.961, "mrr_at_100": 0.961, "naucs_at_1_max": 0.4417016806722691, "naucs_at_1_std": 0.09599673202614255, "naucs_at_1_diff1": 0.8999183006535953, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.12278244631183229, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.12278244631185926, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 0.12278244631185926, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "infovqa_test_subsampled": { "ndcg_at_1": 0.90283, "ndcg_at_3": 0.92859, "ndcg_at_5": 0.93539, "ndcg_at_10": 0.93877, "ndcg_at_20": 0.94183, "ndcg_at_50": 0.94346, "ndcg_at_100": 0.94378, "map_at_1": 0.90283, "map_at_3": 0.92274, "map_at_5": 0.92659, "map_at_10": 0.92804, "map_at_20": 0.92887, "map_at_50": 0.92915, "map_at_100": 0.92917, "recall_at_1": 0.90283, "recall_at_3": 0.94534, "recall_at_5": 0.96154, "recall_at_10": 0.97166, "recall_at_20": 0.98381, "recall_at_50": 0.9919, "recall_at_100": 0.99393, "precision_at_1": 0.90283, "precision_at_3": 0.31511, "precision_at_5": 0.19231, "precision_at_10": 0.09717, "precision_at_20": 0.04919, "precision_at_50": 0.01984, "precision_at_100": 0.00994, "mrr_at_1": 0.8967611336032388, "mrr_at_3": 0.918353576248313, "mrr_at_5": 0.9223009446693656, "mrr_at_10": 0.924116380695328, "mrr_at_20": 0.9248166923205465, "mrr_at_50": 0.9251925733928332, "mrr_at_100": 0.9252178770365579, "naucs_at_1_max": 0.7032099887107416, "naucs_at_1_std": 0.0019003151834775542, "naucs_at_1_diff1": 0.9557773557438912, "naucs_at_3_max": 0.8288721769160055, "naucs_at_3_std": 0.21068169315514182, "naucs_at_3_diff1": 0.9504045111146412, "naucs_at_5_max": 0.8647951304327424, "naucs_at_5_std": 0.5047446344096794, "naucs_at_5_diff1": 0.9570172429660248, "naucs_at_10_max": 0.9308492405555852, "naucs_at_10_std": 0.6471514357346168, "naucs_at_10_diff1": 0.9603236088917129, "naucs_at_20_max": 0.9673496364838108, "naucs_at_20_std": 0.9305663155604971, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": 0.9346992729676393, "naucs_at_50_std": 0.8611326311210196, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 0.9564661819784259, "naucs_at_100_std": 0.8148435081613579, "naucs_at_100_diff1": 1.0 }, "syntheticDocQA_healthcare_industry_test": { "ndcg_at_1": 0.99, "ndcg_at_3": 0.99631, "ndcg_at_5": 0.99631, "ndcg_at_10": 0.99631, "ndcg_at_20": 0.99631, "ndcg_at_50": 0.99631, "ndcg_at_100": 0.99631, "map_at_1": 0.99, "map_at_3": 0.995, "map_at_5": 0.995, "map_at_10": 0.995, "map_at_20": 0.995, "map_at_50": 0.995, "map_at_100": 0.995, "recall_at_1": 0.99, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.99, "mrr_at_3": 0.995, "mrr_at_5": 0.995, "mrr_at_10": 0.995, "mrr_at_20": 0.995, "mrr_at_50": 0.995, "mrr_at_100": 0.995, "naucs_at_1_max": 0.35807656395891085, "naucs_at_1_std": 0.7222222222222201, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "syntheticDocQA_energy_test": { "ndcg_at_1": 0.94, "ndcg_at_3": 0.95762, "ndcg_at_5": 0.95762, "ndcg_at_10": 0.96411, "ndcg_at_20": 0.96411, "ndcg_at_50": 0.96621, "ndcg_at_100": 0.96621, "map_at_1": 0.94, "map_at_3": 0.95333, "map_at_5": 0.95333, "map_at_10": 0.95601, "map_at_20": 0.95601, "map_at_50": 0.9564, "map_at_100": 0.9564, "recall_at_1": 0.94, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.94, "mrr_at_3": 0.9533333333333333, "mrr_at_5": 0.9533333333333333, "mrr_at_10": 0.956190476190476, "mrr_at_20": 0.956190476190476, "mrr_at_50": 0.9566071428571428, "mrr_at_100": 0.9566071428571428, "naucs_at_1_max": 0.5553999377528803, "naucs_at_1_std": -0.8576097105508842, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 0.7424525365701778, "naucs_at_3_std": -1.21708683473389, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.742452536570183, "naucs_at_5_std": -1.2170868347338881, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.8692810457516413, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "arxivqa_test_subsampled": { "ndcg_at_1": 0.844, "ndcg_at_3": 0.89107, "ndcg_at_5": 0.90114, "ndcg_at_10": 0.90777, "ndcg_at_20": 0.91077, "ndcg_at_50": 0.91363, "ndcg_at_100": 0.91429, "map_at_1": 0.844, "map_at_3": 0.87967, "map_at_5": 0.88537, "map_at_10": 0.88818, "map_at_20": 0.88899, "map_at_50": 0.88948, "map_at_100": 0.88954, "recall_at_1": 0.844, "recall_at_3": 0.924, "recall_at_5": 0.948, "recall_at_10": 0.968, "recall_at_20": 0.98, "recall_at_50": 0.994, "recall_at_100": 0.998, "precision_at_1": 0.844, "precision_at_3": 0.308, "precision_at_5": 0.1896, "precision_at_10": 0.0968, "precision_at_20": 0.049, "precision_at_50": 0.01988, "precision_at_100": 0.00998, "mrr_at_1": 0.844, "mrr_at_3": 0.8806666666666665, "mrr_at_5": 0.8857666666666666, "mrr_at_10": 0.888461111111111, "mrr_at_20": 0.8893815257841571, "mrr_at_50": 0.8897941261308093, "mrr_at_100": 0.889852538829222, "naucs_at_1_max": 0.8003780552136457, "naucs_at_1_std": -0.21850586550779852, "naucs_at_1_diff1": 0.9469027104423614, "naucs_at_3_max": 0.8196594427244622, "naucs_at_3_std": -0.19168018084426927, "naucs_at_3_diff1": 0.9294805641554886, "naucs_at_5_max": 0.873770020828847, "naucs_at_5_std": -0.24251238957121693, "naucs_at_5_diff1": 0.9277275012569133, "naucs_at_10_max": 0.894228524743232, "naucs_at_10_std": -0.20824579831933768, "naucs_at_10_diff1": 0.9581290849673211, "naucs_at_20_max": 0.9330065359477141, "naucs_at_20_std": 0.06372549019607167, "naucs_at_20_diff1": 0.9460784313725465, "naucs_at_50_max": 0.9074074074073771, "naucs_at_50_std": 0.4609399315281356, "naucs_at_50_diff1": 0.9128540305010608, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "tatdqa_test": { "ndcg_at_1": 0.70717, "ndcg_at_3": 0.79727, "ndcg_at_5": 0.81602, "ndcg_at_10": 0.83047, "ndcg_at_20": 0.83494, "ndcg_at_50": 0.83746, "ndcg_at_100": 0.83864, "map_at_1": 0.70717, "map_at_3": 0.77582, "map_at_5": 0.78636, "map_at_10": 0.79239, "map_at_20": 0.79362, "map_at_50": 0.79407, "map_at_100": 0.79418, "recall_at_1": 0.70717, "recall_at_3": 0.85905, "recall_at_5": 0.90401, "recall_at_10": 0.94836, "recall_at_20": 0.96598, "recall_at_50": 0.97813, "recall_at_100": 0.98542, "precision_at_1": 0.70717, "precision_at_3": 0.28635, "precision_at_5": 0.1808, "precision_at_10": 0.09484, "precision_at_20": 0.0483, "precision_at_50": 0.01956, "precision_at_100": 0.00985, "mrr_at_1": 0.7023086269744836, "mrr_at_3": 0.7742000810044553, "mrr_at_5": 0.7843458890238966, "mrr_at_10": 0.7906075816312756, "mrr_at_20": 0.7916419924247091, "mrr_at_50": 0.7921731327579793, "mrr_at_100": 0.7922646391503263, "naucs_at_1_max": 0.2906046127459578, "naucs_at_1_std": -0.09142586663565208, "naucs_at_1_diff1": 0.8309895120853787, "naucs_at_3_max": 0.3796718885973771, "naucs_at_3_std": 0.10482653238007278, "naucs_at_3_diff1": 0.7471424147638421, "naucs_at_5_max": 0.3864536158297513, "naucs_at_5_std": 0.1181445112698976, "naucs_at_5_diff1": 0.7150521389103268, "naucs_at_10_max": 0.4839255339546331, "naucs_at_10_std": 0.31833706007680107, "naucs_at_10_diff1": 0.7381822584022644, "naucs_at_20_max": 0.5400727292289017, "naucs_at_20_std": 0.4498780215876757, "naucs_at_20_diff1": 0.7079690151340217, "naucs_at_50_max": 0.5474120233459462, "naucs_at_50_std": 0.5368176937394507, "naucs_at_50_diff1": 0.6888921944972604, "naucs_at_100_max": 0.4582633436145468, "naucs_at_100_std": 0.418915162414328, "naucs_at_100_diff1": 0.6404117260785882 }, "docvqa_test_subsampled": { "ndcg_at_1": 0.52772, "ndcg_at_3": 0.59582, "ndcg_at_5": 0.61394, "ndcg_at_10": 0.63728, "ndcg_at_20": 0.6482, "ndcg_at_50": 0.65777, "ndcg_at_100": 0.66501, "map_at_1": 0.52772, "map_at_3": 0.57945, "map_at_5": 0.58943, "map_at_10": 0.5993, "map_at_20": 0.60214, "map_at_50": 0.60364, "map_at_100": 0.60429, "recall_at_1": 0.52772, "recall_at_3": 0.64302, "recall_at_5": 0.68736, "recall_at_10": 0.75831, "recall_at_20": 0.80266, "recall_at_50": 0.85144, "recall_at_100": 0.89579, "precision_at_1": 0.52772, "precision_at_3": 0.21434, "precision_at_5": 0.13747, "precision_at_10": 0.07583, "precision_at_20": 0.04013, "precision_at_50": 0.01703, "precision_at_100": 0.00896, "mrr_at_1": 0.5254988913525499, "mrr_at_3": 0.5728011825572803, "mrr_at_5": 0.584109386548411, "mrr_at_10": 0.5947524020694751, "mrr_at_20": 0.5975668878612459, "mrr_at_50": 0.5990890888004771, "mrr_at_100": 0.5996060790906476, "naucs_at_1_max": 0.09552367711502079, "naucs_at_1_std": 0.4562953212349874, "naucs_at_1_diff1": 0.9160946322832428, "naucs_at_3_max": -0.044115438920847336, "naucs_at_3_std": 0.5742733170326745, "naucs_at_3_diff1": 0.8655963522117655, "naucs_at_5_max": -0.10576767278069941, "naucs_at_5_std": 0.6507173054255285, "naucs_at_5_diff1": 0.8353615090128605, "naucs_at_10_max": -0.2764424505277048, "naucs_at_10_std": 0.7587119579895203, "naucs_at_10_diff1": 0.8510524017981272, "naucs_at_20_max": -0.43743463755515927, "naucs_at_20_std": 0.7826668175451623, "naucs_at_20_diff1": 0.8554676478694768, "naucs_at_50_max": -0.4574288492202119, "naucs_at_50_std": 0.8851253371587365, "naucs_at_50_diff1": 0.8465829220076285, "naucs_at_100_max": -0.5265892443150121, "naucs_at_100_std": 0.8821668553153678, "naucs_at_100_diff1": 0.8424554142175609 } }