File size: 15,972 Bytes
e90a9b2 |
1 |
{"tatdqa_test": {"ndcg_at_1": 0.63609, "ndcg_at_3": 0.74203, "ndcg_at_5": 0.768, "ndcg_at_10": 0.78607, "ndcg_at_20": 0.79225, "ndcg_at_100": 0.79794, "ndcg_at_1000": 0.7999, "map_at_1": 0.63609, "map_at_3": 0.71699, "map_at_5": 0.73151, "map_at_10": 0.73908, "map_at_20": 0.74079, "map_at_100": 0.74164, "map_at_1000": 0.74174, "recall_at_1": 0.63609, "recall_at_3": 0.81409, "recall_at_5": 0.87667, "recall_at_10": 0.93196, "recall_at_20": 0.95626, "recall_at_100": 0.98603, "recall_at_1000": 1.0, "precision_at_1": 0.63609, "precision_at_3": 0.27136, "precision_at_5": 0.17533, "precision_at_10": 0.0932, "precision_at_20": 0.04781, "precision_at_100": 0.00986, "precision_at_1000": 0.001, "mrr_at_1": 0.6385176184690158, "mrr_at_3": 0.7178007290400972, "mrr_at_5": 0.7320473876063184, "mrr_at_10": 0.7396892900538101, "mrr_at_20": 0.7415524953882554, "mrr_at_100": 0.7423961819039153, "mrr_at_1000": 0.7424935268539138, "naucs_at_1_max": 0.15885418581164265, "naucs_at_1_std": -0.24722089872760156, "naucs_at_1_diff1": 0.8058133847238692, "naucs_at_3_max": 0.1716690156858292, "naucs_at_3_std": -0.23136832603804022, "naucs_at_3_diff1": 0.7036045863253637, "naucs_at_5_max": 0.23427257157466833, "naucs_at_5_std": -0.14847651919138252, "naucs_at_5_diff1": 0.6841422539102495, "naucs_at_10_max": 0.3098360565704996, "naucs_at_10_std": 0.086527322442512, "naucs_at_10_diff1": 0.6375151476027249, "naucs_at_20_max": 0.35048069176464824, "naucs_at_20_std": 0.20141013843756972, "naucs_at_20_diff1": 0.6104227641459228, "naucs_at_100_max": 0.38053080427833685, "naucs_at_100_std": 0.4793427449913261, "naucs_at_100_diff1": 0.4570863373115207, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "shiftproject_test": {"ndcg_at_1": 0.75, "ndcg_at_3": 0.85202, "ndcg_at_5": 0.87355, "ndcg_at_10": 0.87972, "ndcg_at_20": 0.87972, "ndcg_at_100": 0.8816, "ndcg_at_1000": 0.8816, "map_at_1": 0.75, "map_at_3": 0.82833, "map_at_5": 0.84083, "map_at_10": 0.84319, "map_at_20": 0.84319, "map_at_100": 0.84345, "map_at_1000": 0.84345, "recall_at_1": 0.75, "recall_at_3": 0.92, "recall_at_5": 0.97, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.75, "precision_at_3": 0.30667, "precision_at_5": 0.194, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.75, "mrr_at_3": 0.8333333333333333, "mrr_at_5": 0.8428333333333333, "mrr_at_10": 0.8455119047619047, "mrr_at_20": 0.8455119047619047, "mrr_at_100": 0.8457619047619047, "mrr_at_1000": 0.8457619047619047, "naucs_at_1_max": -0.1358032443746735, "naucs_at_1_std": -0.3977394034536896, "naucs_at_1_diff1": 0.6171847200418629, "naucs_at_3_max": 0.15429505135387495, "naucs_at_3_std": -0.00840336134453759, "naucs_at_3_diff1": 0.40581232492997227, "naucs_at_5_max": -0.20572673513849474, "naucs_at_5_std": -0.4344849050731363, "naucs_at_5_diff1": 0.6374105197634568, "naucs_at_10_max": -1.7399626517273863, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": -1.7399626517273863, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.98, "ndcg_at_3": 0.985, "ndcg_at_5": 0.98931, "ndcg_at_10": 0.98931, "ndcg_at_20": 0.98931, "ndcg_at_100": 0.98931, "ndcg_at_1000": 0.98931, "map_at_1": 0.98, "map_at_3": 0.98333, "map_at_5": 0.98583, "map_at_10": 0.98583, "map_at_20": 0.98583, "map_at_100": 0.98583, "map_at_1000": 0.98583, "recall_at_1": 0.98, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.98, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.98, "mrr_at_3": 0.985, "mrr_at_5": 0.9875, "mrr_at_10": 0.9875, "mrr_at_20": 0.9875, "mrr_at_100": 0.9875, "mrr_at_1000": 0.9875, "naucs_at_1_max": 0.8611111111111092, "naucs_at_1_std": -0.367413632119516, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": -0.1713352007469878, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.85, "ndcg_at_3": 0.9294, "ndcg_at_5": 0.93371, "ndcg_at_10": 0.93672, "ndcg_at_20": 0.93672, "ndcg_at_100": 0.93672, "ndcg_at_1000": 0.93672, "map_at_1": 0.85, "map_at_3": 0.91167, "map_at_5": 0.91417, "map_at_10": 0.91528, "map_at_20": 0.91528, "map_at_100": 0.91528, "map_at_1000": 0.91528, "recall_at_1": 0.85, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.85, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.86, "mrr_at_3": 0.9166666666666667, "mrr_at_5": 0.9191666666666667, "mrr_at_10": 0.9204166666666667, "mrr_at_20": 0.9204166666666667, "mrr_at_100": 0.9204166666666667, "mrr_at_1000": 0.9204166666666667, "naucs_at_1_max": 0.6291246339082338, "naucs_at_1_std": 0.3065408395704526, "naucs_at_1_diff1": 0.8960299381711682, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.5401493930905577, "naucs_at_3_diff1": 0.9346405228758099, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.7222222222222276, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "arxivqa_test_subsampled": {"ndcg_at_1": 0.794, "ndcg_at_3": 0.85059, "ndcg_at_5": 0.86058, "ndcg_at_10": 0.86646, "ndcg_at_20": 0.87266, "ndcg_at_100": 0.87945, "ndcg_at_1000": 0.88028, "map_at_1": 0.794, "map_at_3": 0.83633, "map_at_5": 0.84193, "map_at_10": 0.84439, "map_at_20": 0.84617, "map_at_100": 0.84714, "map_at_1000": 0.84718, "recall_at_1": 0.794, "recall_at_3": 0.892, "recall_at_5": 0.916, "recall_at_10": 0.934, "recall_at_20": 0.958, "recall_at_100": 0.994, "recall_at_1000": 1.0, "precision_at_1": 0.794, "precision_at_3": 0.29733, "precision_at_5": 0.1832, "precision_at_10": 0.0934, "precision_at_20": 0.0479, "precision_at_100": 0.00994, "precision_at_1000": 0.001, "mrr_at_1": 0.79, "mrr_at_3": 0.8376666666666667, "mrr_at_5": 0.8412666666666666, "mrr_at_10": 0.8437269841269841, "mrr_at_20": 0.8457550827827144, "mrr_at_100": 0.846530343562146, "mrr_at_1000": 0.8465733155735391, "naucs_at_1_max": 0.7325193686810741, "naucs_at_1_std": 0.14078724474104354, "naucs_at_1_diff1": 0.9224520481739498, "naucs_at_3_max": 0.7665170932709435, "naucs_at_3_std": 0.14292698257499137, "naucs_at_3_diff1": 0.8843279673150137, "naucs_at_5_max": 0.7890600684718345, "naucs_at_5_std": 0.14575830332132672, "naucs_at_5_diff1": 0.8787848472722453, "naucs_at_10_max": 0.7557932263814637, "naucs_at_10_std": 0.08308575955634576, "naucs_at_10_diff1": 0.9008290184760781, "naucs_at_20_max": 0.8719487795118063, "naucs_at_20_std": 0.3432484104753018, "naucs_at_20_diff1": 0.914965986394557, "naucs_at_100_max": 0.9564270152505304, "naucs_at_100_std": 0.9564270152505304, "naucs_at_100_diff1": 0.9074074074073771, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "docvqa_test_subsampled": {"ndcg_at_1": 0.50111, "ndcg_at_3": 0.57079, "ndcg_at_5": 0.5935, "ndcg_at_10": 0.60991, "ndcg_at_20": 0.62193, "ndcg_at_100": 0.64089, "ndcg_at_1000": 0.65609, "map_at_1": 0.50111, "map_at_3": 0.55285, "map_at_5": 0.56537, "map_at_10": 0.57209, "map_at_20": 0.57552, "map_at_100": 0.578, "map_at_1000": 0.57855, "recall_at_1": 0.50111, "recall_at_3": 0.62306, "recall_at_5": 0.67849, "recall_at_10": 0.72949, "recall_at_20": 0.77605, "recall_at_100": 0.88027, "recall_at_1000": 1.0, "precision_at_1": 0.50111, "precision_at_3": 0.20769, "precision_at_5": 0.1357, "precision_at_10": 0.07295, "precision_at_20": 0.0388, "precision_at_100": 0.0088, "precision_at_1000": 0.001, "mrr_at_1": 0.5033259423503326, "mrr_at_3": 0.5543237250554324, "mrr_at_5": 0.5655210643015521, "mrr_at_10": 0.5733132720937598, "mrr_at_20": 0.5761961809258666, "mrr_at_100": 0.5788361852971281, "mrr_at_1000": 0.5793368852941103, "naucs_at_1_max": 0.45963973840308553, "naucs_at_1_std": 0.6437091894280429, "naucs_at_1_diff1": 0.8759915182195691, "naucs_at_3_max": 0.41721087400698254, "naucs_at_3_std": 0.7560034210706996, "naucs_at_3_diff1": 0.8333236691100351, "naucs_at_5_max": 0.3576379403501726, "naucs_at_5_std": 0.8142067757233517, "naucs_at_5_diff1": 0.8324148371504299, "naucs_at_10_max": 0.3077029630906093, "naucs_at_10_std": 0.8506353277336524, "naucs_at_10_diff1": 0.823879626321585, "naucs_at_20_max": 0.23370693314651628, "naucs_at_20_std": 0.883867635902675, "naucs_at_20_diff1": 0.8055657709900838, "naucs_at_100_max": 0.2482445957598351, "naucs_at_100_std": 0.9020390341179869, "naucs_at_100_diff1": 0.7987699160382137, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.98155, "ndcg_at_5": 0.98155, "ndcg_at_10": 0.98155, "ndcg_at_20": 0.98155, "ndcg_at_100": 0.98155, "ndcg_at_1000": 0.98155, "map_at_1": 0.95, "map_at_3": 0.975, "map_at_5": 0.975, "map_at_10": 0.975, "map_at_20": 0.975, "map_at_100": 0.975, "map_at_1000": 0.975, "recall_at_1": 0.95, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.975, "mrr_at_5": 0.975, "mrr_at_10": 0.975, "mrr_at_20": 0.975, "mrr_at_100": 0.975, "mrr_at_1000": 0.975, "naucs_at_1_max": 0.8888888888888874, "naucs_at_1_std": 0.43286647992530286, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "infovqa_test_subsampled": {"ndcg_at_1": 0.85628, "ndcg_at_3": 0.89784, "ndcg_at_5": 0.90708, "ndcg_at_10": 0.90959, "ndcg_at_20": 0.91531, "ndcg_at_100": 0.91797, "ndcg_at_1000": 0.91892, "map_at_1": 0.85628, "map_at_3": 0.88833, "map_at_5": 0.89349, "map_at_10": 0.89446, "map_at_20": 0.89608, "map_at_100": 0.89645, "map_at_1000": 0.89647, "recall_at_1": 0.85628, "recall_at_3": 0.9251, "recall_at_5": 0.94737, "recall_at_10": 0.95547, "recall_at_20": 0.97773, "recall_at_100": 0.9919, "recall_at_1000": 1.0, "precision_at_1": 0.85628, "precision_at_3": 0.30837, "precision_at_5": 0.18947, "precision_at_10": 0.09555, "precision_at_20": 0.04889, "precision_at_100": 0.00992, "precision_at_1000": 0.001, "mrr_at_1": 0.8522267206477733, "mrr_at_3": 0.888663967611336, "mrr_at_5": 0.8919028340080972, "mrr_at_10": 0.89323227941649, "mrr_at_20": 0.8948389638926076, "mrr_at_100": 0.8952180496071648, "mrr_at_1000": 0.8952400720283006, "naucs_at_1_max": 0.6091042742691317, "naucs_at_1_std": 0.06828770581523343, "naucs_at_1_diff1": 0.9363834323697563, "naucs_at_3_max": 0.5528244624347816, "naucs_at_3_std": -0.03701021164354884, "naucs_at_3_diff1": 0.9011217865736105, "naucs_at_5_max": 0.8026139487593744, "naucs_at_5_std": 0.27628703636986895, "naucs_at_5_diff1": 0.911427543567563, "naucs_at_10_max": 0.8451953613222364, "naucs_at_10_std": 0.4649226732413682, "naucs_at_10_diff1": 0.9012598903100626, "naucs_at_20_max": 0.9178862612438203, "naucs_at_20_std": 0.689729386374777, "naucs_at_20_diff1": 0.927259949634811, "naucs_at_100_max": 0.9346992729676393, "naucs_at_100_std": 0.8559234324931341, "naucs_at_100_diff1": 0.9346992729676393, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "syntheticDocQA_energy_test": {"ndcg_at_1": 0.95, "ndcg_at_3": 0.96262, "ndcg_at_5": 0.96649, "ndcg_at_10": 0.96964, "ndcg_at_20": 0.96964, "ndcg_at_100": 0.97185, "ndcg_at_1000": 0.97185, "map_at_1": 0.95, "map_at_3": 0.96, "map_at_5": 0.962, "map_at_10": 0.96325, "map_at_20": 0.96325, "map_at_100": 0.9637, "map_at_1000": 0.9637, "recall_at_1": 0.95, "recall_at_3": 0.97, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.32333, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.95, "mrr_at_3": 0.9583333333333333, "mrr_at_5": 0.9603333333333333, "mrr_at_10": 0.9620000000000001, "mrr_at_20": 0.9620000000000001, "mrr_at_100": 0.9624545454545455, "mrr_at_1000": 0.9624545454545455, "naucs_at_1_max": 0.5589169000933719, "naucs_at_1_std": -0.5752567693744151, "naucs_at_1_diff1": 0.9738562091503253, "naucs_at_3_max": 0.8692810457516398, "naucs_at_3_std": 0.20121381886088488, "naucs_at_3_diff1": 0.9564270152505466, "naucs_at_5_max": 0.8692810457516353, "naucs_at_5_std": 0.24042950513538955, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.8692810457516413, "naucs_at_10_std": 0.35807656395891135, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.8692810457516413, "naucs_at_20_std": 0.35807656395891135, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "tabfquad_test_subsampled": {"ndcg_at_1": 0.83929, "ndcg_at_3": 0.88605, "ndcg_at_5": 0.89666, "ndcg_at_10": 0.90146, "ndcg_at_20": 0.90988, "ndcg_at_100": 0.91198, "ndcg_at_1000": 0.91198, "map_at_1": 0.83929, "map_at_3": 0.875, "map_at_5": 0.88107, "map_at_10": 0.88315, "map_at_20": 0.88562, "map_at_100": 0.88594, "map_at_1000": 0.88594, "recall_at_1": 0.83929, "recall_at_3": 0.91786, "recall_at_5": 0.94286, "recall_at_10": 0.95714, "recall_at_20": 0.98929, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.83929, "precision_at_3": 0.30595, "precision_at_5": 0.18857, "precision_at_10": 0.09571, "precision_at_20": 0.04946, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8321428571428572, "mrr_at_3": 0.8714285714285714, "mrr_at_5": 0.8775, "mrr_at_10": 0.8795833333333333, "mrr_at_20": 0.8820347540930248, "mrr_at_100": 0.8823579864162571, "mrr_at_1000": 0.8823579864162571, "naucs_at_1_max": 0.4780421259412858, "naucs_at_1_std": 0.21763614536723871, "naucs_at_1_diff1": 0.8584742988104332, "naucs_at_3_max": 0.5309341127755455, "naucs_at_3_std": 0.3166889944383569, "naucs_at_3_diff1": 0.7913774205334293, "naucs_at_5_max": 0.5380777310924367, "naucs_at_5_std": 0.37307422969187604, "naucs_at_5_diff1": 0.7886321195144746, "naucs_at_10_max": 0.5157563025210077, "naucs_at_10_std": 0.4667366946778735, "naucs_at_10_diff1": 0.7631107998755065, "naucs_at_20_max": 0.6640211640211727, "naucs_at_20_std": 0.7424525365701908, "naucs_at_20_diff1": 0.807812013694365, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}} |