Spaces:

AIM-Harvard
/

rabbits-leaderboard

Runtime error

App Files Files Community

magilogi commited on Jun 12, 2024

Commit

be36629

1 Parent(s): 49b4a14

add api-results

Browse files

Files changed (6) hide show

data/api-results/__pycache__/api_results.cpython-311.pyc +0 -0
data/api-results/api_results.py +32 -0
data/csv/models_data.csv +7 -3
src/__pycache__/models_info.cpython-311.pyc +0 -0
src/json2df.py +32 -1
src/models_info.py +16 -0

data/api-results/__pycache__/api_results.cpython-311.pyc ADDED Viewed

Binary file (647 Bytes). View file

data/api-results/api_results.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# Corrected and cleaned data
+gpt4 = {
+    'b4bqa': 0.94921875,
+    'medqa_og': 0.9232804232804233,
+    'medqa_g2b': 0.8994708994708994,
+    'medmcqa_og': 0.9166666666666666,
+    'medmcqa_g2b': 0.8879310344827587
+}
+gpt4o = {
+    'b4bqa': 0.96484375,
+    'medqa_og': 0.9021164021164021,
+    'medqa_g2b': 0.8835978835978836,
+    'medmcqa_og': 0.9051724137931034,
+    'medmcqa_g2b': 0.8649425287356322
+}
+gpt35turbo = {
+    'b4bqa': 0.9174107142857143,
+    'medmcqa_og': 0.9827586206896551,
+    'medmcqa_g2b': 0.9770114942528736,
+    'medqa_og': 0.9629629629629629,
+    'medqa_g2b': 0.9603174603174603
+}
+claude_opus = {
+    'b4bqa': 0.921875,
+    'medqa_og': 0.8571428571428571,
+    'medqa_g2b': 0.8333333333333334,
+    'medmcqa_og': 0.8649425287356322,
+    'medmcqa_g2b': 0.7988505747126436
+}

data/csv/models_data.csv CHANGED Viewed

@@ -12,9 +12,13 @@ T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_
 🟢,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
 🟢,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
 💬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
-🟩,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",70.31,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
-🟩,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",87.72,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
-🟩,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",86.1,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
 🔶,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
 🟢,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
 🟢,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29

 🟢,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
 🟢,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
 💬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
+🟢,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",70.31,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
+🟢,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",87.72,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
+🟢,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",86.1,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
 🔶,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
 🟢,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
 🟢,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
+💬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4</a>",94.92,,88.79,91.67,-2.88,89.95,92.33,-2.38
+💬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
+💬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
+💬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38

src/__pycache__/models_info.cpython-311.pyc CHANGED Viewed

Binary files a/src/__pycache__/models_info.cpython-311.pyc and b/src/__pycache__/models_info.cpython-311.pyc differ

src/json2df.py CHANGED Viewed

@@ -1,14 +1,23 @@
 import os
 import json
 import pandas as pd
 from models_info import model_info
 directory = 'data/raw-eval-outputs'
 data = []
 def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 def make_clickable_names(df):
     df["Model"] = df.apply(
         lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
@@ -35,11 +44,33 @@ for filename in os.listdir(directory):
             data.append(row)
 df = pd.DataFrame(data)
 df = make_clickable_names(df)
 df.drop(columns=["Link"], inplace=True)
 df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
 df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
@@ -60,7 +91,7 @@ cols = [
 ]]
 df = df[cols]
 output_csv = 'data/csv/models_data.csv'
 df.to_csv(output_csv, index=False)

 import os
 import json
 import pandas as pd
+import sys
+# Add the path to api-results.py
+sys.path.append(os.path.abspath('data/api-results'))
+# Now import the API results
+from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
 from models_info import model_info
 directory = 'data/raw-eval-outputs'
 data = []
+# Function to create a clickable hyperlink for the model name
 def model_hyperlink(link, model_name):
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+# Function to apply the hyperlink creation function to the DataFrame
 def make_clickable_names(df):
     df["Model"] = df.apply(
         lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
             data.append(row)
+# Prepare the API results for integration
+api_models = {
+    'GPT-4': gpt4,
+    'GPT-4o': gpt4o,
+    'GPT-3.5 Turbo': gpt35turbo,
+    'Claude Opus': claude_opus
+}
+for model_name, results in api_models.items():
+    row = {
+        'Model': model_name,
+        'b4bqa': round(results.get('b4bqa', 0) * 100, 2),
+        'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2),
+        'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2),
+        'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2),
+        'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2),
+        'T': model_info[model_name]['tuning'],
+        'Link': model_info[model_name]['link']
+    }
+    data.append(row)
+# Create DataFrame from the collected data
 df = pd.DataFrame(data)
 df = make_clickable_names(df)
 df.drop(columns=["Link"], inplace=True)
+# Calculate differences between specific evaluation metrics
 df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
 df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
 ]]
 df = df[cols]
+# Save DataFrame to CSV
 output_csv = 'data/csv/models_data.csv'
 df.to_csv(output_csv, index=False)

src/models_info.py CHANGED Viewed

@@ -76,4 +76,20 @@ model_info = {
         "link": "https://huggingface.co/Qwen/Qwen2-7B",
         "tuning": "🟢"  # Pre-trained
     },
 }

         "link": "https://huggingface.co/Qwen/Qwen2-7B",
         "tuning": "🟢"  # Pre-trained
     },
+    "GPT-4": {
+        "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
+        "tuning": "💬"
+    },
+    "GPT-4o": {
+        "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
+        "tuning": "💬"
+    },
+    "GPT-3.5 Turbo": {
+        "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
+        "tuning": "💬"
+    },
+    "Claude Opus": {
+        "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
+        "tuning": "💬"
+    }
 }