gme-Qwen2-VL-2B-Instruct / results.json
zyznull's picture
Create results.json
e6da526 verified
{"arxivqa_test_subsampled": {"ndcg_at_1": 0.786, "ndcg_at_3": 0.82859, "ndcg_at_5": 0.83909, "ndcg_at_10": 0.85118, "ndcg_at_20": 0.85912, "ndcg_at_50": 0.86489, "ndcg_at_100": 0.8669, "map_at_1": 0.786, "map_at_3": 0.819, "map_at_5": 0.8247, "map_at_10": 0.82957, "map_at_20": 0.83167, "map_at_50": 0.83269, "map_at_100": 0.83289, "recall_at_1": 0.786, "recall_at_3": 0.856, "recall_at_5": 0.882, "recall_at_10": 0.92, "recall_at_20": 0.952, "recall_at_50": 0.98, "recall_at_100": 0.992, "precision_at_1": 0.786, "precision_at_3": 0.28533, "precision_at_5": 0.1764, "precision_at_10": 0.092, "precision_at_20": 0.0476, "precision_at_50": 0.0196, "precision_at_100": 0.00992, "mrr_at_1": 0.786, "mrr_at_3": 0.8189999999999998, "mrr_at_5": 0.8246999999999999, "mrr_at_10": 0.8295706349206347, "mrr_at_20": 0.8316712173447467, "mrr_at_50": 0.8326901700317237, "mrr_at_100": 0.832885280842064, "naucs_at_1_max": 0.8892284946618716, "naucs_at_1_std": -0.3267276061583954, "naucs_at_1_diff1": 0.9336110202863116, "naucs_at_3_max": 0.8940728333693538, "naucs_at_3_std": -0.2892397881996969, "naucs_at_3_diff1": 0.9035214501837059, "naucs_at_5_max": 0.8728866928256561, "naucs_at_5_std": -0.2504965363561503, "naucs_at_5_diff1": 0.8887031923654505, "naucs_at_10_max": 0.8634570494864612, "naucs_at_10_std": -0.32796451914099106, "naucs_at_10_diff1": 0.8821195144724552, "naucs_at_20_max": 0.8949579831932764, "naucs_at_20_std": -0.29034391534391896, "naucs_at_20_diff1": 0.878618113912229, "naucs_at_50_max": 0.9096638655462147, "naucs_at_50_std": 0.12301587301586674, "naucs_at_50_diff1": 0.9014939309056892, "naucs_at_100_max": 1.0, "naucs_at_100_std": 0.8558590102707644, "naucs_at_100_diff1": 0.9673202614378978}, "docvqa_test_subsampled": {"ndcg_at_1": 0.4612, "ndcg_at_3": 0.52679, "ndcg_at_5": 0.54568, "ndcg_at_10": 0.56932, "ndcg_at_20": 0.58751, "ndcg_at_50": 0.59746, "ndcg_at_100": 0.60597, "map_at_1": 0.4612, "map_at_3": 0.51109, "map_at_5": 0.5214, "map_at_10": 0.53156, "map_at_20": 0.53639, "map_at_50": 0.5381, "map_at_100": 0.53881, "recall_at_1": 0.4612, "recall_at_3": 0.57206, "recall_at_5": 0.61863, "recall_at_10": 0.68958, "recall_at_20": 0.76275, "recall_at_50": 0.81153, "recall_at_100": 0.86475, "precision_at_1": 0.4612, "precision_at_3": 0.19069, "precision_at_5": 0.12373, "precision_at_10": 0.06896, "precision_at_20": 0.03814, "precision_at_50": 0.01623, "precision_at_100": 0.00865, "mrr_at_1": 0.4611973392461197, "mrr_at_3": 0.5110864745011088, "mrr_at_5": 0.5213968957871397, "mrr_at_10": 0.531558617534227, "mrr_at_20": 0.5363705459439568, "mrr_at_50": 0.5380818653716594, "mrr_at_100": 0.5387956639409233, "naucs_at_1_max": 0.6018461520563698, "naucs_at_1_std": -0.29453860375419927, "naucs_at_1_diff1": 0.7645813967276972, "naucs_at_3_max": 0.58164206836642, "naucs_at_3_std": -0.25403366910903274, "naucs_at_3_diff1": 0.6515883726118482, "naucs_at_5_max": 0.5721975818175079, "naucs_at_5_std": -0.2744904256592273, "naucs_at_5_diff1": 0.61928231963369, "naucs_at_10_max": 0.5671552158378993, "naucs_at_10_std": -0.28750264247429225, "naucs_at_10_diff1": 0.5626707252181952, "naucs_at_20_max": 0.6156578041155646, "naucs_at_20_std": -0.2749165128258015, "naucs_at_20_diff1": 0.6002227825216901, "naucs_at_50_max": 0.6012704408386546, "naucs_at_50_std": -0.33739300418375234, "naucs_at_50_diff1": 0.6419776051914634, "naucs_at_100_max": 0.6412951544246476, "naucs_at_100_std": -0.2690460787063671, "naucs_at_100_diff1": 0.6375549090035819}, "infovqa_test_subsampled": {"ndcg_at_1": 0.8664, "ndcg_at_3": 0.89876, "ndcg_at_5": 0.91113, "ndcg_at_10": 0.91568, "ndcg_at_20": 0.91815, "ndcg_at_50": 0.92068, "ndcg_at_100": 0.92099, "map_at_1": 0.8664, "map_at_3": 0.89103, "map_at_5": 0.89781, "map_at_10": 0.89966, "map_at_20": 0.9003, "map_at_50": 0.90076, "map_at_100": 0.90078, "recall_at_1": 0.8664, "recall_at_3": 0.92105, "recall_at_5": 0.95142, "recall_at_10": 0.96559, "recall_at_20": 0.97571, "recall_at_50": 0.98785, "recall_at_100": 0.98988, "precision_at_1": 0.8664, "precision_at_3": 0.30702, "precision_at_5": 0.19028, "precision_at_10": 0.09656, "precision_at_20": 0.04879, "precision_at_50": 0.01976, "precision_at_100": 0.0099, "mrr_at_1": 0.8663967611336032, "mrr_at_3": 0.891025641025641, "mrr_at_5": 0.8978070175438595, "mrr_at_10": 0.8996626180836705, "mrr_at_20": 0.9003085288927154, "mrr_at_50": 0.9007665297125582, "mrr_at_100": 0.9007882962878053, "naucs_at_1_max": 0.6852647369506982, "naucs_at_1_std": -0.11019839462727245, "naucs_at_1_diff1": 0.9097315890589761, "naucs_at_3_max": 0.6980505621537562, "naucs_at_3_std": -0.1587891880170896, "naucs_at_3_diff1": 0.8829217951800641, "naucs_at_5_max": 0.857855262670585, "naucs_at_5_std": 0.07362769173568413, "naucs_at_5_diff1": 0.9054241580005612, "naucs_at_10_max": 0.8918421488449596, "naucs_at_10_std": 0.19317064064529682, "naucs_at_10_diff1": 0.9222031546896321, "naucs_at_20_max": 0.8948019000326467, "naucs_at_20_std": 0.19941878566952734, "naucs_at_20_diff1": 0.9238158184622186, "naucs_at_50_max": 0.9564661819783937, "naucs_at_50_std": 0.6394965227510232, "naucs_at_50_diff1": 0.9129323639568211, "naucs_at_100_max": 0.9477594183740937, "naucs_at_100_std": 0.5935161181141998, "naucs_at_100_diff1": 0.9216391275611305}, "shiftproject_test": {"ndcg_at_1": 0.88, "ndcg_at_3": 0.94286, "ndcg_at_5": 0.94286, "ndcg_at_10": 0.94601, "ndcg_at_20": 0.94601, "ndcg_at_50": 0.94601, "ndcg_at_100": 0.94601, "map_at_1": 0.88, "map_at_3": 0.92667, "map_at_5": 0.92667, "map_at_10": 0.92792, "map_at_20": 0.92792, "map_at_50": 0.92792, "map_at_100": 0.92792, "recall_at_1": 0.88, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.88, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.89, "mrr_at_3": 0.9316666666666668, "mrr_at_5": 0.9316666666666668, "mrr_at_10": 0.9329166666666667, "mrr_at_20": 0.9329166666666667, "mrr_at_50": 0.9329166666666667, "mrr_at_100": 0.9329166666666667, "naucs_at_1_max": 0.477529430480433, "naucs_at_1_std": -0.17256601972637695, "naucs_at_1_diff1": 0.8993795736557426, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.35807656395889226, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.35807656395891135, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.98, "ndcg_at_3": 0.98631, "ndcg_at_5": 0.99018, "ndcg_at_10": 0.99018, "ndcg_at_20": 0.99018, "ndcg_at_50": 0.99018, "ndcg_at_100": 0.99018, "map_at_1": 0.98, "map_at_3": 0.985, "map_at_5": 0.987, "map_at_10": 0.987, "map_at_20": 0.987, "map_at_50": 0.987, "map_at_100": 0.987, "recall_at_1": 0.98, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.98, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.98, "mrr_at_3": 0.985, "mrr_at_5": 0.987, "mrr_at_10": 0.987, "mrr_at_20": 0.987, "mrr_at_50": 0.987, "mrr_at_100": 0.987, "naucs_at_1_max": 1.0, "naucs_at_1_std": 0.8692810457516332, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.8692810457516356, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_energy_test": {"ndcg_at_1": 0.9, "ndcg_at_3": 0.92762, "ndcg_at_5": 0.93149, "ndcg_at_10": 0.93149, "ndcg_at_20": 0.93698, "ndcg_at_50": 0.9409, "ndcg_at_100": 0.9409, "map_at_1": 0.9, "map_at_3": 0.92, "map_at_5": 0.922, "map_at_10": 0.922, "map_at_20": 0.92374, "map_at_50": 0.92435, "map_at_100": 0.92435, "recall_at_1": 0.9, "recall_at_3": 0.95, "recall_at_5": 0.96, "recall_at_10": 0.96, "recall_at_20": 0.98, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.9, "precision_at_3": 0.31667, "precision_at_5": 0.192, "precision_at_10": 0.096, "precision_at_20": 0.049, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.9, "mrr_at_3": 0.92, "mrr_at_5": 0.9220000000000002, "mrr_at_10": 0.9220000000000002, "mrr_at_20": 0.9237424242424244, "mrr_at_50": 0.924346027846028, "mrr_at_100": 0.924346027846028, "naucs_at_1_max": 0.7248832866479916, "naucs_at_1_std": -0.5339402427637734, "naucs_at_1_diff1": 0.9477124183006546, "naucs_at_3_max": 0.5314659197012136, "naucs_at_3_std": -0.9360410830999026, "naucs_at_3_diff1": 0.9477124183006521, "naucs_at_5_max": 0.7071661998132599, "naucs_at_5_std": -0.7350606909430335, "naucs_at_5_diff1": 0.9346405228758139, "naucs_at_10_max": 0.7071661998132599, "naucs_at_10_std": -0.7350606909430335, "naucs_at_10_diff1": 0.9346405228758139, "naucs_at_20_max": 1.0, "naucs_at_20_std": -0.024276377217554025, "naucs_at_20_diff1": 0.9346405228758136, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.96, "ndcg_at_3": 0.97893, "ndcg_at_5": 0.97893, "ndcg_at_10": 0.98194, "ndcg_at_20": 0.98194, "ndcg_at_50": 0.98194, "ndcg_at_100": 0.98194, "map_at_1": 0.96, "map_at_3": 0.975, "map_at_5": 0.975, "map_at_10": 0.97611, "map_at_20": 0.97611, "map_at_50": 0.97611, "map_at_100": 0.97611, "recall_at_1": 0.96, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.96, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.96, "mrr_at_3": 0.975, "mrr_at_5": 0.975, "mrr_at_10": 0.9761111111111112, "mrr_at_20": 0.9761111111111112, "mrr_at_50": 0.9761111111111112, "mrr_at_100": 0.9761111111111112, "naucs_at_1_max": 0.5997899159663848, "naucs_at_1_std": -1.2007469654528418, "naucs_at_1_diff1": 0.96732026143791, "naucs_at_3_max": 0.7222222222222157, "naucs_at_3_std": -1.7399626517274398, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.7222222222222276, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.97, "ndcg_at_3": 0.98893, "ndcg_at_5": 0.98893, "ndcg_at_10": 0.98893, "ndcg_at_20": 0.98893, "ndcg_at_50": 0.98893, "ndcg_at_100": 0.98893, "map_at_1": 0.97, "map_at_3": 0.985, "map_at_5": 0.985, "map_at_10": 0.985, "map_at_20": 0.985, "map_at_50": 0.985, "map_at_100": 0.985, "recall_at_1": 0.97, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.97, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.97, "mrr_at_3": 0.985, "mrr_at_5": 0.985, "mrr_at_10": 0.985, "mrr_at_20": 0.985, "mrr_at_50": 0.985, "mrr_at_100": 0.985, "naucs_at_1_max": 0.8078120136943662, "naucs_at_1_std": 0.4733893557422995, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null}, "tabfquad_test_subsampled": {"ndcg_at_1": 0.91429, "ndcg_at_3": 0.9372, "ndcg_at_5": 0.94612, "ndcg_at_10": 0.94612, "ndcg_at_20": 0.94992, "ndcg_at_50": 0.9522, "ndcg_at_100": 0.9522, "map_at_1": 0.91429, "map_at_3": 0.93155, "map_at_5": 0.93655, "map_at_10": 0.93655, "map_at_20": 0.93769, "map_at_50": 0.93813, "map_at_100": 0.93813, "recall_at_1": 0.91429, "recall_at_3": 0.95357, "recall_at_5": 0.975, "recall_at_10": 0.975, "recall_at_20": 0.98929, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.91429, "precision_at_3": 0.31786, "precision_at_5": 0.195, "precision_at_10": 0.0975, "precision_at_20": 0.04946, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.9142857142857143, "mrr_at_3": 0.9315476190476192, "mrr_at_5": 0.9365476190476191, "mrr_at_10": 0.9365476190476191, "mrr_at_20": 0.9376930014430014, "mrr_at_50": 0.9381267131585722, "mrr_at_100": 0.9381267131585722, "naucs_at_1_max": 0.8191332088390891, "naucs_at_1_std": 0.4374610955493308, "naucs_at_1_diff1": 0.8954442888266413, "naucs_at_3_max": 0.9484665661136237, "naucs_at_3_std": 0.7109818286288868, "naucs_at_3_diff1": 0.9484665661136237, "naucs_at_5_max": 0.9416433239962654, "naucs_at_5_std": 0.8163265306122475, "naucs_at_5_diff1": 0.9439775910364095, "naucs_at_10_max": 0.9416433239962654, "naucs_at_10_std": 0.8163265306122475, "naucs_at_10_diff1": 0.9439775910364095, "naucs_at_20_max": 0.9564270152505505, "naucs_at_20_std": 0.8638344226579515, "naucs_at_20_diff1": 0.9564270152505505, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0}, "tatdqa_test": {"ndcg_at_1": 0.57594, "ndcg_at_3": 0.68196, "ndcg_at_5": 0.71051, "ndcg_at_10": 0.73262, "ndcg_at_20": 0.74285, "ndcg_at_50": 0.75016, "ndcg_at_100": 0.75232, "map_at_1": 0.57594, "map_at_3": 0.65583, "map_at_5": 0.67169, "map_at_10": 0.68088, "map_at_20": 0.68373, "map_at_50": 0.68489, "map_at_100": 0.68507, "recall_at_1": 0.57594, "recall_at_3": 0.75759, "recall_at_5": 0.82685, "recall_at_10": 0.8949, "recall_at_20": 0.93499, "recall_at_50": 0.97205, "recall_at_100": 0.98542, "precision_at_1": 0.57594, "precision_at_3": 0.25253, "precision_at_5": 0.16537, "precision_at_10": 0.08949, "precision_at_20": 0.04675, "precision_at_50": 0.01944, "precision_at_100": 0.00985, "mrr_at_1": 0.5765492102065614, "mrr_at_3": 0.6560348319157563, "mrr_at_5": 0.6721952207371417, "mrr_at_10": 0.6813224459488133, "mrr_at_20": 0.6841812145327757, "mrr_at_50": 0.6853427671085895, "mrr_at_100": 0.6855309554803338, "naucs_at_1_max": 0.5301694754533977, "naucs_at_1_std": -0.1619263455379304, "naucs_at_1_diff1": 0.7278065996239447, "naucs_at_3_max": 0.5908076044993518, "naucs_at_3_std": -0.1323722551834884, "naucs_at_3_diff1": 0.6135897839866084, "naucs_at_5_max": 0.5964907222680631, "naucs_at_5_std": -0.12904588858296043, "naucs_at_5_diff1": 0.5731447174171511, "naucs_at_10_max": 0.6224287679728321, "naucs_at_10_std": 0.0010190169826118439, "naucs_at_10_diff1": 0.5292659611089736, "naucs_at_20_max": 0.7484085475471819, "naucs_at_20_std": 0.2475268741958173, "naucs_at_20_diff1": 0.5572740928407118, "naucs_at_50_max": 0.7916713620973378, "naucs_at_50_std": 0.35271506013966625, "naucs_at_50_diff1": 0.6134648860897702, "naucs_at_100_max": 0.83478407909221, "naucs_at_100_std": 0.6562392206248636, "naucs_at_100_diff1": 0.594171346126163}}