shopping_mmlu_leaderboard / ShoppingMMLU.json
Yilun Jin
is there a bug?
0302c93
{
"time": "241031154353",
"results": {
"GPT-4o (0513, detail-high)": {
"META": {
"Method": [
"GPT-4o (0513, detail-high)",
"https://openai.com/index/hello-gpt-4o/"
],
"Parameters": "",
"Language Model": "",
"Vision Model": "",
"Org": "OpenAI",
"Time": "2024/05/31",
"Verified": "Yes",
"OpenSource": "No",
"key": 270,
"dir_name": "GPT4o_HIGH"
},
"Shopping Concept Understanding": {
"Rec": 67.8,
"Ocr": 76.8,
"Know": 58.3,
"Gen": 56.9,
"Spat": 74.3,
"Math": 76.2,
"Overall": 69.1,
"Overall (official)": "N/A"
},
"Shopping Knowledge Reasoning": {
"Overall": 61.3,
"SCI": 64.8,
"TQA": 70.3,
"NUM": 44.4,
"ARI": 58.4,
"VQA": 47.5,
"GEO": 61.5,
"ALG": 62.3,
"GPS": 60.1,
"MWP": 69.9,
"LOG": 43.2,
"FQA": 60.2,
"STA": 68.4
},
"User Behavior Alignment": {
"Text Recognition": 199,
"Scene Text-centric VQA": 181,
"Doc-oriented VQA": 168,
"Key Information Extraction": 170,
"Handwritten Mathematical Expression Recognition": 18,
"Overall": 736
},
"Multi-lingual Abilities": {
"Overall": 63.9,
"coarse perception": 73.6,
"fine-grained perception": 54.8,
"instance reasoning": 66.4,
"logical reasoning": 72.0,
"math": 66.4,
"science & technology": 50.0
}
}
}
}