{ "syntheticDocQA_artificial_intelligence_test": { "ndcg_at_1": 0.96, "ndcg_at_3": 0.98524, "ndcg_at_5": 0.98524, "ndcg_at_10": 0.98524, "ndcg_at_20": 0.98524, "ndcg_at_50": 0.98524, "ndcg_at_100": 0.98524, "map_at_1": 0.96, "map_at_3": 0.98, "map_at_5": 0.98, "map_at_10": 0.98, "map_at_20": 0.98, "map_at_50": 0.98, "map_at_100": 0.98, "recall_at_1": 0.96, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.96, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.96, "mrr_at_3": 0.98, "mrr_at_5": 0.98, "mrr_at_10": 0.98, "mrr_at_20": 0.98, "mrr_at_50": 0.98, "mrr_at_100": 0.98, "naucs_at_1_max": 0.06302521008403594, "naucs_at_1_std": -0.04843604108309577, "naucs_at_1_diff1": 0.9305555555555578, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "tabfquad_test_subsampled": { "ndcg_at_1": 0.76429, "ndcg_at_3": 0.81548, "ndcg_at_5": 0.83054, "ndcg_at_10": 0.84162, "ndcg_at_20": 0.85069, "ndcg_at_50": 0.85663, "ndcg_at_100": 0.85846, "map_at_1": 0.76429, "map_at_3": 0.80238, "map_at_5": 0.81095, "map_at_10": 0.81526, "map_at_20": 0.81777, "map_at_50": 0.81884, "map_at_100": 0.81903, "recall_at_1": 0.76429, "recall_at_3": 0.85357, "recall_at_5": 0.88929, "recall_at_10": 0.925, "recall_at_20": 0.96071, "recall_at_50": 0.98929, "recall_at_100": 1.0, "precision_at_1": 0.76429, "precision_at_3": 0.28452, "precision_at_5": 0.17786, "precision_at_10": 0.0925, "precision_at_20": 0.04804, "precision_at_50": 0.01979, "precision_at_100": 0.01, "mrr_at_1": 0.7642857142857142, "mrr_at_3": 0.8047619047619048, "mrr_at_5": 0.812261904761905, "mrr_at_10": 0.8157568027210885, "mrr_at_20": 0.8182429277072134, "mrr_at_50": 0.8196521812519686, "mrr_at_100": 0.819844389714301, "naucs_at_1_max": 0.3930648524086476, "naucs_at_1_std": 0.13014375068583456, "naucs_at_1_diff1": 0.8538351805113578, "naucs_at_3_max": 0.3984224656102249, "naucs_at_3_std": 0.1907296096552706, "naucs_at_3_diff1": 0.7447435318714221, "naucs_at_5_max": 0.3885787498856053, "naucs_at_5_std": 0.16155699948140706, "naucs_at_5_diff1": 0.795170983191481, "naucs_at_10_max": 0.3904450669156529, "naucs_at_10_std": 0.15479525143390607, "naucs_at_10_diff1": 0.7791561068871982, "naucs_at_20_max": 0.28736949325184535, "naucs_at_20_std": -0.07932263814616634, "naucs_at_20_diff1": 0.7797725150666338, "naucs_at_50_max": 0.34267040149392364, "naucs_at_50_std": -0.1545284780578993, "naucs_at_50_diff1": 0.8078120136943696, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "shiftproject_test": { "ndcg_at_1": 0.53, "ndcg_at_3": 0.6444, "ndcg_at_5": 0.68185, "ndcg_at_10": 0.70901, "ndcg_at_20": 0.71713, "ndcg_at_50": 0.72325, "ndcg_at_100": 0.7248, "map_at_1": 0.53, "map_at_3": 0.615, "map_at_5": 0.636, "map_at_10": 0.64798, "map_at_20": 0.6505, "map_at_50": 0.65154, "map_at_100": 0.65165, "recall_at_1": 0.53, "recall_at_3": 0.73, "recall_at_5": 0.82, "recall_at_10": 0.9, "recall_at_20": 0.93, "recall_at_50": 0.96, "recall_at_100": 0.97, "precision_at_1": 0.53, "precision_at_3": 0.24333, "precision_at_5": 0.164, "precision_at_10": 0.09, "precision_at_20": 0.0465, "precision_at_50": 0.0192, "precision_at_100": 0.0097, "mrr_at_1": 0.55, "mrr_at_3": 0.6300000000000001, "mrr_at_5": 0.655, "mrr_at_10": 0.6632341269841271, "mrr_at_20": 0.6648575036075036, "mrr_at_50": 0.6659570406445406, "mrr_at_100": 0.6660746877033642, "naucs_at_1_max": -0.058021416101939505, "naucs_at_1_std": -0.29796973154270173, "naucs_at_1_diff1": 0.632737219755598, "naucs_at_3_max": 0.0032239674457922728, "naucs_at_3_std": -0.39310778666771473, "naucs_at_3_diff1": 0.5492736244077923, "naucs_at_5_max": 0.18798876528252045, "naucs_at_5_std": -0.11273267981054919, "naucs_at_5_diff1": 0.6309890957153872, "naucs_at_10_max": 0.11223155929038518, "naucs_at_10_std": -0.22661064425770017, "naucs_at_10_diff1": 0.565966386554623, "naucs_at_20_max": -0.13685474189675803, "naucs_at_20_std": -0.5446178471388509, "naucs_at_20_diff1": 0.6702681072429001, "naucs_at_50_max": -0.27065826330532117, "naucs_at_50_std": -0.5157563025210059, "naucs_at_50_diff1": 0.6458916900093365, "naucs_at_100_max": -0.30376595082477015, "naucs_at_100_std": -0.6305633364456832, "naucs_at_100_diff1": 0.5714285714285773 }, "syntheticDocQA_government_reports_test": { "ndcg_at_1": 0.9, "ndcg_at_3": 0.93524, "ndcg_at_5": 0.95203, "ndcg_at_10": 0.95203, "ndcg_at_20": 0.95203, "ndcg_at_50": 0.95203, "ndcg_at_100": 0.95203, "map_at_1": 0.9, "map_at_3": 0.92667, "map_at_5": 0.93617, "map_at_10": 0.93617, "map_at_20": 0.93617, "map_at_50": 0.93617, "map_at_100": 0.93617, "recall_at_1": 0.9, "recall_at_3": 0.96, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.9, "precision_at_3": 0.32, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.9, "mrr_at_3": 0.9333333333333332, "mrr_at_5": 0.9403333333333334, "mrr_at_10": 0.9403333333333334, "mrr_at_20": 0.9403333333333334, "mrr_at_50": 0.9403333333333334, "mrr_at_100": 0.9403333333333334, "naucs_at_1_max": 0.1526143790849667, "naucs_at_1_std": -0.28823529411764676, "naucs_at_1_diff1": 0.8867880485527548, "naucs_at_3_max": 0.07119514472455671, "naucs_at_3_std": -0.29236694677871167, "naucs_at_3_diff1": 0.8190943043884199, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "infovqa_test_subsampled": { "ndcg_at_1": 0.76316, "ndcg_at_3": 0.82921, "ndcg_at_5": 0.84289, "ndcg_at_10": 0.85188, "ndcg_at_20": 0.85644, "ndcg_at_50": 0.86119, "ndcg_at_100": 0.86283, "map_at_1": 0.76316, "map_at_3": 0.8141, "map_at_5": 0.8219, "map_at_10": 0.8255, "map_at_20": 0.82673, "map_at_50": 0.82746, "map_at_100": 0.8276, "recall_at_1": 0.76316, "recall_at_3": 0.87247, "recall_at_5": 0.90486, "recall_at_10": 0.9332, "recall_at_20": 0.95142, "recall_at_50": 0.97571, "recall_at_100": 0.98583, "precision_at_1": 0.76316, "precision_at_3": 0.29082, "precision_at_5": 0.18097, "precision_at_10": 0.09332, "precision_at_20": 0.04757, "precision_at_50": 0.01951, "precision_at_100": 0.00986, "mrr_at_1": 0.7611336032388664, "mrr_at_3": 0.813090418353576, "mrr_at_5": 0.8216936572199727, "mrr_at_10": 0.8248160465265728, "mrr_at_20": 0.825886692856507, "mrr_at_50": 0.8265042447979247, "mrr_at_100": 0.8266668975545796, "naucs_at_1_max": 0.5439433197090425, "naucs_at_1_std": 0.24776250817754733, "naucs_at_1_diff1": 0.8926114371628262, "naucs_at_3_max": 0.5680546695594344, "naucs_at_3_std": 0.2541748591933084, "naucs_at_3_diff1": 0.853534819493221, "naucs_at_5_max": 0.705693682065501, "naucs_at_5_std": 0.4124626625681076, "naucs_at_5_diff1": 0.8406271757500486, "naucs_at_10_max": 0.7825666550711661, "naucs_at_10_std": 0.5614849682029542, "naucs_at_10_diff1": 0.842869767889761, "naucs_at_20_max": 0.7543170269410486, "naucs_at_20_std": 0.5443151217010491, "naucs_at_20_diff1": 0.8462829819306663, "naucs_at_50_max": 0.8236318160505198, "naucs_at_50_std": 0.7478265745755217, "naucs_at_50_diff1": 0.8852960979962946, "naucs_at_100_max": 0.7986413595283582, "naucs_at_100_std": 0.6710512069467076, "naucs_at_100_diff1": 0.8616984811111583 }, "syntheticDocQA_healthcare_industry_test": { "ndcg_at_1": 0.92, "ndcg_at_3": 0.95024, "ndcg_at_5": 0.95885, "ndcg_at_10": 0.95885, "ndcg_at_20": 0.95885, "ndcg_at_50": 0.96077, "ndcg_at_100": 0.96077, "map_at_1": 0.92, "map_at_3": 0.94333, "map_at_5": 0.94833, "map_at_10": 0.94833, "map_at_20": 0.94833, "map_at_50": 0.94861, "map_at_100": 0.94861, "recall_at_1": 0.92, "recall_at_3": 0.97, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32333, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.9633333333333333, "mrr_at_5": 0.9658333333333333, "mrr_at_10": 0.9658333333333333, "mrr_at_20": 0.9658333333333333, "mrr_at_50": 0.96625, "mrr_at_100": 0.96625, "naucs_at_1_max": 0.6046918767507006, "naucs_at_1_std": 0.07207049486461263, "naucs_at_1_diff1": 0.9279295051353874, "naucs_at_3_max": 0.460939931528168, "naucs_at_3_std": 0.08839091192032704, "naucs_at_3_diff1": 0.8513849984438244, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 0.5541549953314738, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 0.5541549953314738, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.5541549953314738, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "syntheticDocQA_energy_test": { "ndcg_at_1": 0.91, "ndcg_at_3": 0.94786, "ndcg_at_5": 0.94786, "ndcg_at_10": 0.95142, "ndcg_at_20": 0.95142, "ndcg_at_50": 0.9536, "ndcg_at_100": 0.95525, "map_at_1": 0.91, "map_at_3": 0.94, "map_at_5": 0.94, "map_at_10": 0.94167, "map_at_20": 0.94167, "map_at_50": 0.9421, "map_at_100": 0.94226, "recall_at_1": 0.91, "recall_at_3": 0.97, "recall_at_5": 0.97, "recall_at_10": 0.98, "recall_at_20": 0.98, "recall_at_50": 0.99, "recall_at_100": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.32333, "precision_at_5": 0.194, "precision_at_10": 0.098, "precision_at_20": 0.049, "precision_at_50": 0.0198, "precision_at_100": 0.01, "mrr_at_1": 0.95, "mrr_at_3": 0.96, "mrr_at_5": 0.9625, "mrr_at_10": 0.9625, "mrr_at_20": 0.9631666666666666, "mrr_at_50": 0.9631666666666666, "mrr_at_100": 0.9633279569892474, "naucs_at_1_max": 0.6794791990870418, "naucs_at_1_std": -0.20370370370370447, "naucs_at_1_diff1": 0.9854756717501807, "naucs_at_3_max": 0.807812013694371, "naucs_at_3_std": -0.45238095238094883, "naucs_at_3_diff1": 0.9564270152505466, "naucs_at_5_max": 0.807812013694364, "naucs_at_5_std": -0.45238095238095277, "naucs_at_5_diff1": 0.9564270152505424, "naucs_at_10_max": 0.9346405228758136, "naucs_at_10_std": 0.1914098972922579, "naucs_at_10_diff1": 0.9346405228758136, "naucs_at_20_max": 0.9346405228758136, "naucs_at_20_std": 0.1914098972922579, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_50_max": 1.0, "naucs_at_50_std": 0.554154995331464, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "arxivqa_test_subsampled": { "ndcg_at_1": 0.822, "ndcg_at_3": 0.86124, "ndcg_at_5": 0.87217, "ndcg_at_10": 0.87958, "ndcg_at_20": 0.88759, "ndcg_at_50": 0.8913, "ndcg_at_100": 0.89258, "map_at_1": 0.822, "map_at_3": 0.85133, "map_at_5": 0.85753, "map_at_10": 0.86076, "map_at_20": 0.86318, "map_at_50": 0.86383, "map_at_100": 0.86394, "recall_at_1": 0.822, "recall_at_3": 0.89, "recall_at_5": 0.916, "recall_at_10": 0.938, "recall_at_20": 0.968, "recall_at_50": 0.986, "recall_at_100": 0.994, "precision_at_1": 0.822, "precision_at_3": 0.29667, "precision_at_5": 0.1832, "precision_at_10": 0.0938, "precision_at_20": 0.0484, "precision_at_50": 0.01972, "precision_at_100": 0.00994, "mrr_at_1": 0.81, "mrr_at_3": 0.8446666666666667, "mrr_at_5": 0.8526666666666665, "mrr_at_10": 0.8558063492063492, "mrr_at_20": 0.8576622827722674, "mrr_at_50": 0.8582444004699996, "mrr_at_100": 0.858351807213441, "naucs_at_1_max": 0.6618424420480973, "naucs_at_1_std": 0.1726204386872768, "naucs_at_1_diff1": 0.8807854527648874, "naucs_at_3_max": 0.6544884460097947, "naucs_at_3_std": 0.2072674168885846, "naucs_at_3_diff1": 0.8465338029378915, "naucs_at_5_max": 0.6638877773331578, "naucs_at_5_std": 0.23538304210572925, "naucs_at_5_diff1": 0.8338224178560337, "naucs_at_10_max": 0.6184000481913182, "naucs_at_10_std": 0.1655371826149804, "naucs_at_10_diff1": 0.8101713803801093, "naucs_at_20_max": 0.7978232959850622, "naucs_at_20_std": 0.47426470588234737, "naucs_at_20_diff1": 0.8186566293183944, "naucs_at_50_max": 0.9019607843137241, "naucs_at_50_std": 0.7225556889422341, "naucs_at_50_diff1": 0.8832866479925231, "naucs_at_100_max": 1.0, "naucs_at_100_std": 0.8078120136943184, "naucs_at_100_diff1": 0.8202614379084721 }, "tatdqa_test": { "ndcg_at_1": 0.60571, "ndcg_at_3": 0.70518, "ndcg_at_5": 0.73148, "ndcg_at_10": 0.75212, "ndcg_at_20": 0.76107, "ndcg_at_50": 0.76775, "ndcg_at_100": 0.7707, "map_at_1": 0.60571, "map_at_3": 0.68094, "map_at_5": 0.69555, "map_at_10": 0.70408, "map_at_20": 0.70656, "map_at_50": 0.7076, "map_at_100": 0.70786, "recall_at_1": 0.60571, "recall_at_3": 0.77521, "recall_at_5": 0.839, "recall_at_10": 0.90279, "recall_at_20": 0.93803, "recall_at_50": 0.97205, "recall_at_100": 0.99028, "precision_at_1": 0.60571, "precision_at_3": 0.2584, "precision_at_5": 0.1678, "precision_at_10": 0.09028, "precision_at_20": 0.0469, "precision_at_50": 0.01944, "precision_at_100": 0.0099, "mrr_at_1": 0.6057108140947752, "mrr_at_3": 0.6830700688537875, "mrr_at_5": 0.6966788173349543, "mrr_at_10": 0.7058545005689602, "mrr_at_20": 0.707882381086114, "mrr_at_50": 0.7091241073972927, "mrr_at_100": 0.7093644660054671, "naucs_at_1_max": 0.3183541397363705, "naucs_at_1_std": 0.09826700774797242, "naucs_at_1_diff1": 0.7187936758795999, "naucs_at_3_max": 0.37504605152871257, "naucs_at_3_std": 0.14677854908146837, "naucs_at_3_diff1": 0.6383523296619431, "naucs_at_5_max": 0.42709986452798276, "naucs_at_5_std": 0.22990979779837553, "naucs_at_5_diff1": 0.6357454223598389, "naucs_at_10_max": 0.48845166873972423, "naucs_at_10_std": 0.33255307682895896, "naucs_at_10_diff1": 0.6184766146789907, "naucs_at_20_max": 0.45722581804053714, "naucs_at_20_std": 0.3272040057998003, "naucs_at_20_diff1": 0.6092689407956671, "naucs_at_50_max": 0.4613805353039302, "naucs_at_50_std": 0.35387081782577695, "naucs_at_50_diff1": 0.5797820548654825, "naucs_at_100_max": 0.5928666738326348, "naucs_at_100_std": 0.40800919961891313, "naucs_at_100_diff1": 0.5890653536441407 }, "docvqa_test_subsampled": { "ndcg_at_1": 0.47672, "ndcg_at_3": 0.55229, "ndcg_at_5": 0.56851, "ndcg_at_10": 0.58892, "ndcg_at_20": 0.60374, "ndcg_at_50": 0.6134, "ndcg_at_100": 0.62175, "map_at_1": 0.47672, "map_at_3": 0.53326, "map_at_5": 0.54213, "map_at_10": 0.55075, "map_at_20": 0.55494, "map_at_50": 0.55648, "map_at_100": 0.55724, "recall_at_1": 0.47672, "recall_at_3": 0.60754, "recall_at_5": 0.64745, "recall_at_10": 0.70953, "recall_at_20": 0.76718, "recall_at_50": 0.81596, "recall_at_100": 0.86696, "precision_at_1": 0.47672, "precision_at_3": 0.20251, "precision_at_5": 0.12949, "precision_at_10": 0.07095, "precision_at_20": 0.03836, "precision_at_50": 0.01632, "precision_at_100": 0.00867, "mrr_at_1": 0.48337028824833705, "mrr_at_3": 0.5354767184035477, "mrr_at_5": 0.5487804878048779, "mrr_at_10": 0.5557869637137927, "mrr_at_20": 0.5593928559116855, "mrr_at_50": 0.5615025624726138, "mrr_at_100": 0.5621791399632005, "naucs_at_1_max": 0.3785349172346823, "naucs_at_1_std": 0.6557287667252649, "naucs_at_1_diff1": 0.8526440408297176, "naucs_at_3_max": 0.28374137300417, "naucs_at_3_std": 0.7269399252140738, "naucs_at_3_diff1": 0.7922662049837966, "naucs_at_5_max": 0.23868786072943135, "naucs_at_5_std": 0.7281901708135905, "naucs_at_5_diff1": 0.7793870480543929, "naucs_at_10_max": 0.1650957871644876, "naucs_at_10_std": 0.7712043441704308, "naucs_at_10_diff1": 0.7842842146130585, "naucs_at_20_max": 0.0797800670615453, "naucs_at_20_std": 0.8135232985740711, "naucs_at_20_diff1": 0.7706236528786001, "naucs_at_50_max": 0.07226615905116034, "naucs_at_50_std": 0.8406489117674839, "naucs_at_50_diff1": 0.7290414525233613, "naucs_at_100_max": -0.023763600742901323, "naucs_at_100_std": 0.8665455621668288, "naucs_at_100_diff1": 0.6958809819906159 } }