Spaces:
Runtime error
Runtime error
magilogi
commited on
Commit
Β·
be36629
1
Parent(s):
49b4a14
add api-results
Browse files
data/api-results/__pycache__/api_results.cpython-311.pyc
ADDED
Binary file (647 Bytes). View file
|
|
data/api-results/api_results.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Corrected and cleaned data
|
2 |
+
gpt4 = {
|
3 |
+
'b4bqa': 0.94921875,
|
4 |
+
'medqa_og': 0.9232804232804233,
|
5 |
+
'medqa_g2b': 0.8994708994708994,
|
6 |
+
'medmcqa_og': 0.9166666666666666,
|
7 |
+
'medmcqa_g2b': 0.8879310344827587
|
8 |
+
}
|
9 |
+
|
10 |
+
gpt4o = {
|
11 |
+
'b4bqa': 0.96484375,
|
12 |
+
'medqa_og': 0.9021164021164021,
|
13 |
+
'medqa_g2b': 0.8835978835978836,
|
14 |
+
'medmcqa_og': 0.9051724137931034,
|
15 |
+
'medmcqa_g2b': 0.8649425287356322
|
16 |
+
}
|
17 |
+
|
18 |
+
gpt35turbo = {
|
19 |
+
'b4bqa': 0.9174107142857143,
|
20 |
+
'medmcqa_og': 0.9827586206896551,
|
21 |
+
'medmcqa_g2b': 0.9770114942528736,
|
22 |
+
'medqa_og': 0.9629629629629629,
|
23 |
+
'medqa_g2b': 0.9603174603174603
|
24 |
+
}
|
25 |
+
|
26 |
+
claude_opus = {
|
27 |
+
'b4bqa': 0.921875,
|
28 |
+
'medqa_og': 0.8571428571428571,
|
29 |
+
'medqa_g2b': 0.8333333333333334,
|
30 |
+
'medmcqa_og': 0.8649425287356322,
|
31 |
+
'medmcqa_g2b': 0.7988505747126436
|
32 |
+
}
|
data/csv/models_data.csv
CHANGED
@@ -12,9 +12,13 @@ T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_
|
|
12 |
π’,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
|
13 |
π’,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
|
14 |
π¬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
πΆ,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
|
19 |
π’,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
|
20 |
π’,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
|
|
|
|
|
|
|
|
|
|
12 |
π’,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
|
13 |
π’,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
|
14 |
π¬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
|
15 |
+
π’,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",70.31,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
|
16 |
+
π’,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",87.72,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
|
17 |
+
π’,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",86.1,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
|
18 |
πΆ,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
|
19 |
π’,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
|
20 |
π’,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
|
21 |
+
π¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4</a>",94.92,,88.79,91.67,-2.88,89.95,92.33,-2.38
|
22 |
+
π¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
|
23 |
+
π¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
|
24 |
+
π¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
|
src/__pycache__/models_info.cpython-311.pyc
CHANGED
Binary files a/src/__pycache__/models_info.cpython-311.pyc and b/src/__pycache__/models_info.cpython-311.pyc differ
|
|
src/json2df.py
CHANGED
@@ -1,14 +1,23 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from models_info import model_info
|
5 |
|
6 |
directory = 'data/raw-eval-outputs'
|
7 |
data = []
|
8 |
|
|
|
9 |
def model_hyperlink(link, model_name):
|
10 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
11 |
|
|
|
12 |
def make_clickable_names(df):
|
13 |
df["Model"] = df.apply(
|
14 |
lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
|
@@ -35,11 +44,33 @@ for filename in os.listdir(directory):
|
|
35 |
|
36 |
data.append(row)
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
df = pd.DataFrame(data)
|
40 |
df = make_clickable_names(df)
|
41 |
df.drop(columns=["Link"], inplace=True)
|
42 |
|
|
|
43 |
df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
|
44 |
df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
|
45 |
|
@@ -60,7 +91,7 @@ cols = [
|
|
60 |
]]
|
61 |
df = df[cols]
|
62 |
|
63 |
-
|
64 |
output_csv = 'data/csv/models_data.csv'
|
65 |
df.to_csv(output_csv, index=False)
|
66 |
|
|
|
1 |
import os
|
2 |
import json
|
3 |
import pandas as pd
|
4 |
+
import sys
|
5 |
+
|
6 |
+
# Add the path to api-results.py
|
7 |
+
sys.path.append(os.path.abspath('data/api-results'))
|
8 |
+
|
9 |
+
# Now import the API results
|
10 |
+
from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
|
11 |
from models_info import model_info
|
12 |
|
13 |
directory = 'data/raw-eval-outputs'
|
14 |
data = []
|
15 |
|
16 |
+
# Function to create a clickable hyperlink for the model name
|
17 |
def model_hyperlink(link, model_name):
|
18 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
19 |
|
20 |
+
# Function to apply the hyperlink creation function to the DataFrame
|
21 |
def make_clickable_names(df):
|
22 |
df["Model"] = df.apply(
|
23 |
lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
|
|
|
44 |
|
45 |
data.append(row)
|
46 |
|
47 |
+
# Prepare the API results for integration
|
48 |
+
api_models = {
|
49 |
+
'GPT-4': gpt4,
|
50 |
+
'GPT-4o': gpt4o,
|
51 |
+
'GPT-3.5 Turbo': gpt35turbo,
|
52 |
+
'Claude Opus': claude_opus
|
53 |
+
}
|
54 |
|
55 |
+
for model_name, results in api_models.items():
|
56 |
+
row = {
|
57 |
+
'Model': model_name,
|
58 |
+
'b4bqa': round(results.get('b4bqa', 0) * 100, 2),
|
59 |
+
'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2),
|
60 |
+
'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2),
|
61 |
+
'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2),
|
62 |
+
'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2),
|
63 |
+
'T': model_info[model_name]['tuning'],
|
64 |
+
'Link': model_info[model_name]['link']
|
65 |
+
}
|
66 |
+
data.append(row)
|
67 |
+
|
68 |
+
# Create DataFrame from the collected data
|
69 |
df = pd.DataFrame(data)
|
70 |
df = make_clickable_names(df)
|
71 |
df.drop(columns=["Link"], inplace=True)
|
72 |
|
73 |
+
# Calculate differences between specific evaluation metrics
|
74 |
df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
|
75 |
df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
|
76 |
|
|
|
91 |
]]
|
92 |
df = df[cols]
|
93 |
|
94 |
+
# Save DataFrame to CSV
|
95 |
output_csv = 'data/csv/models_data.csv'
|
96 |
df.to_csv(output_csv, index=False)
|
97 |
|
src/models_info.py
CHANGED
@@ -76,4 +76,20 @@ model_info = {
|
|
76 |
"link": "https://huggingface.co/Qwen/Qwen2-7B",
|
77 |
"tuning": "π’" # Pre-trained
|
78 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
}
|
|
|
76 |
"link": "https://huggingface.co/Qwen/Qwen2-7B",
|
77 |
"tuning": "π’" # Pre-trained
|
78 |
},
|
79 |
+
"GPT-4": {
|
80 |
+
"link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
|
81 |
+
"tuning": "π¬"
|
82 |
+
},
|
83 |
+
"GPT-4o": {
|
84 |
+
"link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
|
85 |
+
"tuning": "π¬"
|
86 |
+
},
|
87 |
+
"GPT-3.5 Turbo": {
|
88 |
+
"link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
|
89 |
+
"tuning": "π¬"
|
90 |
+
},
|
91 |
+
"Claude Opus": {
|
92 |
+
"link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
|
93 |
+
"tuning": "π¬"
|
94 |
+
}
|
95 |
}
|