Markgazol's picture
Upload results.json
1f84016 verified
{"arxivqa_test_subsampled": {"ndcg_at_1": 0.89, "ndcg_at_3": 0.92076, "ndcg_at_5": 0.9304, "ndcg_at_10": 0.93171, "ndcg_at_20": 0.93634, "ndcg_at_100": 0.93869, "ndcg_at_1000": 0.93896, "map_at_1": 0.89, "map_at_3": 0.914, "map_at_5": 0.9192, "map_at_10": 0.91976, "map_at_20": 0.92107, "map_at_100": 0.92145, "map_at_1000": 0.92146, "recall_at_1": 0.89, "recall_at_3": 0.94, "recall_at_5": 0.964, "recall_at_10": 0.968, "recall_at_20": 0.986, "recall_at_100": 0.998, "recall_at_1000": 1.0, "precision_at_1": 0.89, "precision_at_3": 0.31333, "precision_at_5": 0.1928, "precision_at_10": 0.0968, "precision_at_20": 0.0493, "precision_at_100": 0.00998, "precision_at_1000": 0.001, "mrr_at_1": 0.894, "mrr_at_3": 0.916, "mrr_at_5": 0.9212999999999998, "mrr_at_10": 0.9218555555555554, "mrr_at_20": 0.9231820991873619, "mrr_at_100": 0.9235471865999493, "mrr_at_1000": 0.9235571368487056, "naucs_at_1_max": 0.774710076453912, "naucs_at_1_std": 0.00752512670732741, "naucs_at_1_diff1": 0.9401683704149125, "naucs_at_3_max": 0.8818860877684437, "naucs_at_3_std": 0.00991285403050398, "naucs_at_3_diff1": 0.9471677559912888, "naucs_at_5_max": 0.8817304699657652, "naucs_at_5_std": -0.17535532731611664, "naucs_at_5_diff1": 0.9627814088598367, "naucs_at_10_max": 0.8751167133520102, "naucs_at_10_std": -0.15134803921569462, "naucs_at_10_diff1": 0.9581290849673211, "naucs_at_20_max": 0.8896225156729255, "naucs_at_20_std": -0.14685874349740685, "naucs_at_20_diff1": 0.981325863678799, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_test_subsampled": {"ndcg_at_1": 0.55432, "ndcg_at_3": 0.63048, "ndcg_at_5": 0.65127, "ndcg_at_10": 0.66756, "ndcg_at_20": 0.67849, "ndcg_at_100": 0.69527, "ndcg_at_1000": 0.70521, "map_at_1": 0.55432, "map_at_3": 0.6116, "map_at_5": 0.62302, "map_at_10": 0.62963, "map_at_20": 0.63248, "map_at_100": 0.63479, "map_at_1000": 0.63516, "recall_at_1": 0.55432, "recall_at_3": 0.68514, "recall_at_5": 0.73614, "recall_at_10": 0.78714, "recall_at_20": 0.83149, "recall_at_100": 0.92239, "recall_at_1000": 1.0, "precision_at_1": 0.55432, "precision_at_3": 0.22838, "precision_at_5": 0.14723, "precision_at_10": 0.07871, "precision_at_20": 0.04157, "precision_at_100": 0.00922, "precision_at_1000": 0.001, "mrr_at_1": 0.5521064301552107, "mrr_at_3": 0.6116038433111605, "mrr_at_5": 0.6222468588322246, "mrr_at_10": 0.628252912399254, "mrr_at_20": 0.6313297096559096, "mrr_at_100": 0.6335948649982139, "mrr_at_1000": 0.6340054408664814, "naucs_at_1_max": 0.5209116999344013, "naucs_at_1_std": 0.12484436707396533, "naucs_at_1_diff1": 0.8921265837288471, "naucs_at_3_max": 0.4729783728508365, "naucs_at_3_std": 0.1852246991093841, "naucs_at_3_diff1": 0.8113189012837098, "naucs_at_5_max": 0.4227426188801733, "naucs_at_5_std": 0.29772795869845586, "naucs_at_5_diff1": 0.790277015336412, "naucs_at_10_max": 0.3507777944555388, "naucs_at_10_std": 0.4290376458730776, "naucs_at_10_diff1": 0.7834648082247443, "naucs_at_20_max": 0.2764677767612935, "naucs_at_20_std": 0.5247195182225136, "naucs_at_20_diff1": 0.7743772789808456, "naucs_at_100_max": 0.1271737733453686, "naucs_at_100_std": 0.8546267750892371, "naucs_at_100_diff1": 0.801763032793754, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "infovqa_test_subsampled": {"ndcg_at_1": 0.89271, "ndcg_at_3": 0.92385, "ndcg_at_5": 0.93152, "ndcg_at_10": 0.93535, "ndcg_at_20": 0.93692, "ndcg_at_100": 0.93955, "ndcg_at_1000": 0.94033, "map_at_1": 0.89271, "map_at_3": 0.917, "map_at_5": 0.92136, "map_at_10": 0.92288, "map_at_20": 0.92333, "map_at_100": 0.92369, "map_at_1000": 0.92372, "recall_at_1": 0.89271, "recall_at_3": 0.94332, "recall_at_5": 0.96154, "recall_at_10": 0.97368, "recall_at_20": 0.97976, "recall_at_100": 0.99393, "recall_at_1000": 1.0, "precision_at_1": 0.89271, "precision_at_3": 0.31444, "precision_at_5": 0.19231, "precision_at_10": 0.09737, "precision_at_20": 0.04899, "precision_at_100": 0.00994, "precision_at_1000": 0.001, "mrr_at_1": 0.8927125506072875, "mrr_at_3": 0.9166666666666665, "mrr_at_5": 0.9210188933873145, "mrr_at_10": 0.922516226463595, "mrr_at_20": 0.9229463884069148, "mrr_at_100": 0.9233103110716202, "mrr_at_1000": 0.9233371050974674, "naucs_at_1_max": 0.630222668714959, "naucs_at_1_std": -0.17597703721563485, "naucs_at_1_diff1": 0.9479386032063167, "naucs_at_3_max": 0.7670268743790779, "naucs_at_3_std": -0.08858705654515041, "naucs_at_3_diff1": 0.9362577494333106, "naucs_at_5_max": 0.8423430537170873, "naucs_at_5_std": 0.07105495946203985, "naucs_at_5_diff1": 0.9363959607452695, "naucs_at_10_max": 0.8223529961696429, "naucs_at_10_std": 0.1964949763377099, "naucs_at_10_diff1": 0.9170865160173047, "naucs_at_20_max": 0.8861454756035069, "naucs_at_20_std": 0.39922255218911656, "naucs_at_20_diff1": 0.9199859445982925, "naucs_at_100_max": 0.9564661819784259, "naucs_at_100_std": 0.39174816052979483, "naucs_at_100_diff1": 0.9564661819784259, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "tabfquad_test_subsampled": {"ndcg_at_1": 0.86786, "ndcg_at_3": 0.89218, "ndcg_at_5": 0.89971, "ndcg_at_10": 0.90407, "ndcg_at_20": 0.91161, "ndcg_at_100": 0.91795, "ndcg_at_1000": 0.91795, "map_at_1": 0.86786, "map_at_3": 0.8869, "map_at_5": 0.89119, "map_at_10": 0.89284, "map_at_20": 0.89506, "map_at_100": 0.89608, "map_at_1000": 0.89608, "recall_at_1": 0.86786, "recall_at_3": 0.90714, "recall_at_5": 0.925, "recall_at_10": 0.93929, "recall_at_20": 0.96786, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.86786, "precision_at_3": 0.30238, "precision_at_5": 0.185, "precision_at_10": 0.09393, "precision_at_20": 0.04839, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8678571428571429, "mrr_at_3": 0.8880952380952382, "mrr_at_5": 0.8914880952380952, "mrr_at_10": 0.8936054421768708, "mrr_at_20": 0.8956120665049235, "mrr_at_100": 0.8966105262484262, "mrr_at_1000": 0.8966105262484262, "naucs_at_1_max": 0.6224620116245722, "naucs_at_1_std": 0.5163030729533189, "naucs_at_1_diff1": 0.83614825240441, "naucs_at_3_max": 0.6640271493212687, "naucs_at_3_std": 0.5782159017453128, "naucs_at_3_diff1": 0.851325145442794, "naucs_at_5_max": 0.638255302120847, "naucs_at_5_std": 0.5640478413587644, "naucs_at_5_diff1": 0.8353785958827951, "naucs_at_10_max": 0.7831053990223564, "naucs_at_10_std": 0.6937441643324004, "naucs_at_10_diff1": 0.8555500631625198, "naucs_at_20_max": 0.8596846145865782, "naucs_at_20_std": 0.767143894594877, "naucs_at_20_diff1": 0.8306359580869404, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tatdqa_test": {"ndcg_at_1": 0.69502, "ndcg_at_3": 0.78429, "ndcg_at_5": 0.80689, "ndcg_at_10": 0.82146, "ndcg_at_20": 0.82583, "ndcg_at_100": 0.8309, "ndcg_at_1000": 0.83205, "map_at_1": 0.69502, "map_at_3": 0.76225, "map_at_5": 0.77498, "map_at_10": 0.78113, "map_at_20": 0.78236, "map_at_100": 0.78301, "map_at_1000": 0.78307, "recall_at_1": 0.69502, "recall_at_3": 0.84812, "recall_at_5": 0.90219, "recall_at_10": 0.94654, "recall_at_20": 0.96355, "recall_at_100": 0.99149, "recall_at_1000": 1.0, "precision_at_1": 0.69502, "precision_at_3": 0.28271, "precision_at_5": 0.18044, "precision_at_10": 0.09465, "precision_at_20": 0.04818, "precision_at_100": 0.00991, "precision_at_1000": 0.001, "mrr_at_1": 0.6883353584447145, "mrr_at_3": 0.7605305791818548, "mrr_at_5": 0.7727116241393278, "mrr_at_10": 0.7791292985399909, "mrr_at_20": 0.7801871126234015, "mrr_at_100": 0.7808254346817661, "mrr_at_1000": 0.7808836879847301, "naucs_at_1_max": 0.23990631588559144, "naucs_at_1_std": -0.2053649772556745, "naucs_at_1_diff1": 0.8161613217793823, "naucs_at_3_max": 0.26064316805130433, "naucs_at_3_std": -0.1845930509882535, "naucs_at_3_diff1": 0.7281548848493787, "naucs_at_5_max": 0.30328170182306735, "naucs_at_5_std": -0.07060955382534272, "naucs_at_5_diff1": 0.6809803738586584, "naucs_at_10_max": 0.39930135459761296, "naucs_at_10_std": 0.15329956032047617, "naucs_at_10_diff1": 0.6497620104605146, "naucs_at_20_max": 0.4778187578829847, "naucs_at_20_std": 0.2799110282451862, "naucs_at_20_diff1": 0.6693088691205222, "naucs_at_100_max": 0.44210430745767804, "naucs_at_100_std": 0.3792107382178212, "naucs_at_100_diff1": 0.6891173738798902, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shiftproject_test": {"ndcg_at_1": 0.74, "ndcg_at_3": 0.84833, "ndcg_at_5": 0.86512, "ndcg_at_10": 0.8754, "ndcg_at_20": 0.8754, "ndcg_at_100": 0.87714, "ndcg_at_1000": 0.87714, "map_at_1": 0.74, "map_at_3": 0.82333, "map_at_5": 0.83283, "map_at_10": 0.83742, "map_at_20": 0.83742, "map_at_100": 0.83761, "map_at_1000": 0.83761, "recall_at_1": 0.74, "recall_at_3": 0.92, "recall_at_5": 0.96, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.74, "precision_at_3": 0.30667, "precision_at_5": 0.192, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.74, "mrr_at_3": 0.83, "mrr_at_5": 0.8370000000000001, "mrr_at_10": 0.8413452380952381, "mrr_at_20": 0.8413452380952381, "mrr_at_100": 0.8415413165266107, "mrr_at_1000": 0.8415413165266107, "naucs_at_1_max": 0.05362718462349462, "naucs_at_1_std": -0.40608653339280615, "naucs_at_1_diff1": 0.8296297798142814, "naucs_at_3_max": 0.44520308123249314, "naucs_at_3_std": -0.2791783380018683, "naucs_at_3_diff1": 0.7462068160597581, "naucs_at_5_max": 0.37640056022409174, "naucs_at_5_std": -0.10270774976656832, "naucs_at_5_diff1": 0.8231792717086805, "naucs_at_10_max": 0.5541549953314738, "naucs_at_10_std": 0.35807656395891135, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": 0.5541549953314738, "naucs_at_20_std": 0.35807656395891135, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.99, "ndcg_at_3": 0.99, "ndcg_at_5": 0.99431, "ndcg_at_10": 0.99431, "ndcg_at_20": 0.99431, "ndcg_at_100": 0.99431, "ndcg_at_1000": 0.99431, "map_at_1": 0.99, "map_at_3": 0.99, "map_at_5": 0.9925, "map_at_10": 0.9925, "map_at_20": 0.9925, "map_at_100": 0.9925, "map_at_1000": 0.9925, "recall_at_1": 0.99, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.99, "mrr_at_3": 0.99, "mrr_at_5": 0.9925, "mrr_at_10": 0.9925, "mrr_at_20": 0.9925, "mrr_at_100": 0.9925, "mrr_at_1000": 0.9925, "naucs_at_1_max": 1.0, "naucs_at_1_std": 0.8692810457516276, "naucs_at_1_diff1": 0.8692810457516276, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.8692810457516356, "naucs_at_3_diff1": 0.8692810457516356, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_energy_test": {"ndcg_at_1": 0.94, "ndcg_at_3": 0.95631, "ndcg_at_5": 0.96062, "ndcg_at_10": 0.96062, "ndcg_at_20": 0.96332, "ndcg_at_100": 0.96527, "ndcg_at_1000": 0.96527, "map_at_1": 0.94, "map_at_3": 0.95167, "map_at_5": 0.95417, "map_at_10": 0.95417, "map_at_20": 0.955, "map_at_100": 0.95529, "map_at_1000": 0.95529, "recall_at_1": 0.94, "recall_at_3": 0.97, "recall_at_5": 0.98, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.32333, "precision_at_5": 0.196, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.9566666666666666, "mrr_at_5": 0.9591666666666666, "mrr_at_10": 0.9591666666666666, "mrr_at_20": 0.9600757575757575, "mrr_at_100": 0.9603698752228164, "mrr_at_1000": 0.9603698752228164, "naucs_at_1_max": 0.7587924058512304, "naucs_at_1_std": -0.2661064425770283, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 0.7587924058512326, "naucs_at_3_std": -0.7791783380018689, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.7770774976657324, "naucs_at_5_std": -1.445845004668519, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.7770774976657324, "naucs_at_10_std": -1.445845004668519, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": -1.1517273576097316, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.94, "ndcg_at_3": 0.97655, "ndcg_at_5": 0.97655, "ndcg_at_10": 0.97655, "ndcg_at_20": 0.97655, "ndcg_at_100": 0.97655, "ndcg_at_1000": 0.97655, "map_at_1": 0.94, "map_at_3": 0.96833, "map_at_5": 0.96833, "map_at_10": 0.96833, "map_at_20": 0.96833, "map_at_100": 0.96833, "map_at_1000": 0.96833, "recall_at_1": 0.94, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.975, "mrr_at_5": 0.975, "mrr_at_10": 0.975, "mrr_at_20": 0.975, "mrr_at_100": 0.975, "mrr_at_1000": 0.975, "naucs_at_1_max": 0.8330999066293182, "naucs_at_1_std": 0.27108621226268315, "naucs_at_1_diff1": 0.8821195144724544, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.97, "ndcg_at_3": 0.98762, "ndcg_at_5": 0.98762, "ndcg_at_10": 0.98762, "ndcg_at_20": 0.98762, "ndcg_at_100": 0.98762, "ndcg_at_1000": 0.98762, "map_at_1": 0.97, "map_at_3": 0.98333, "map_at_5": 0.98333, "map_at_10": 0.98333, "map_at_20": 0.98333, "map_at_100": 0.98333, "map_at_1000": 0.98333, "recall_at_1": 0.97, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.97, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.97, "mrr_at_3": 0.985, "mrr_at_5": 0.985, "mrr_at_10": 0.985, "mrr_at_20": 0.985, "mrr_at_100": 0.985, "mrr_at_1000": 0.985, "naucs_at_1_max": 0.6591970121381903, "naucs_at_1_std": -0.25630252100840467, "naucs_at_1_diff1": 0.9564270152505465, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}