magilogi commited on
Commit
be36629
Β·
1 Parent(s): 49b4a14

add api-results

Browse files
data/api-results/__pycache__/api_results.cpython-311.pyc ADDED
Binary file (647 Bytes). View file
 
data/api-results/api_results.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Corrected and cleaned data
2
+ gpt4 = {
3
+ 'b4bqa': 0.94921875,
4
+ 'medqa_og': 0.9232804232804233,
5
+ 'medqa_g2b': 0.8994708994708994,
6
+ 'medmcqa_og': 0.9166666666666666,
7
+ 'medmcqa_g2b': 0.8879310344827587
8
+ }
9
+
10
+ gpt4o = {
11
+ 'b4bqa': 0.96484375,
12
+ 'medqa_og': 0.9021164021164021,
13
+ 'medqa_g2b': 0.8835978835978836,
14
+ 'medmcqa_og': 0.9051724137931034,
15
+ 'medmcqa_g2b': 0.8649425287356322
16
+ }
17
+
18
+ gpt35turbo = {
19
+ 'b4bqa': 0.9174107142857143,
20
+ 'medmcqa_og': 0.9827586206896551,
21
+ 'medmcqa_g2b': 0.9770114942528736,
22
+ 'medqa_og': 0.9629629629629629,
23
+ 'medqa_g2b': 0.9603174603174603
24
+ }
25
+
26
+ claude_opus = {
27
+ 'b4bqa': 0.921875,
28
+ 'medqa_og': 0.8571428571428571,
29
+ 'medqa_g2b': 0.8333333333333334,
30
+ 'medmcqa_og': 0.8649425287356322,
31
+ 'medmcqa_g2b': 0.7988505747126436
32
+ }
data/csv/models_data.csv CHANGED
@@ -12,9 +12,13 @@ T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_
12
  🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
13
  🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
14
  πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
15
- 🟩,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",70.31,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
16
- 🟩,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",87.72,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
17
- 🟩,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",86.1,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
18
  πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
19
  🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
20
  🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
 
 
 
 
 
12
  🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
13
  🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
14
  πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
15
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",70.31,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
16
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",87.72,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
17
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",86.1,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
18
  πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
19
  🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
20
  🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
21
+ πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4</a>",94.92,,88.79,91.67,-2.88,89.95,92.33,-2.38
22
+ πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
23
+ πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
24
+ πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B-v2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
src/__pycache__/models_info.cpython-311.pyc CHANGED
Binary files a/src/__pycache__/models_info.cpython-311.pyc and b/src/__pycache__/models_info.cpython-311.pyc differ
 
src/json2df.py CHANGED
@@ -1,14 +1,23 @@
1
  import os
2
  import json
3
  import pandas as pd
 
 
 
 
 
 
 
4
  from models_info import model_info
5
 
6
  directory = 'data/raw-eval-outputs'
7
  data = []
8
 
 
9
  def model_hyperlink(link, model_name):
10
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
11
 
 
12
  def make_clickable_names(df):
13
  df["Model"] = df.apply(
14
  lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
@@ -35,11 +44,33 @@ for filename in os.listdir(directory):
35
 
36
  data.append(row)
37
 
 
 
 
 
 
 
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  df = pd.DataFrame(data)
40
  df = make_clickable_names(df)
41
  df.drop(columns=["Link"], inplace=True)
42
 
 
43
  df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
44
  df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
45
 
@@ -60,7 +91,7 @@ cols = [
60
  ]]
61
  df = df[cols]
62
 
63
-
64
  output_csv = 'data/csv/models_data.csv'
65
  df.to_csv(output_csv, index=False)
66
 
 
1
  import os
2
  import json
3
  import pandas as pd
4
+ import sys
5
+
6
+ # Add the path to api-results.py
7
+ sys.path.append(os.path.abspath('data/api-results'))
8
+
9
+ # Now import the API results
10
+ from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
11
  from models_info import model_info
12
 
13
  directory = 'data/raw-eval-outputs'
14
  data = []
15
 
16
+ # Function to create a clickable hyperlink for the model name
17
  def model_hyperlink(link, model_name):
18
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
19
 
20
+ # Function to apply the hyperlink creation function to the DataFrame
21
  def make_clickable_names(df):
22
  df["Model"] = df.apply(
23
  lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
 
44
 
45
  data.append(row)
46
 
47
+ # Prepare the API results for integration
48
+ api_models = {
49
+ 'GPT-4': gpt4,
50
+ 'GPT-4o': gpt4o,
51
+ 'GPT-3.5 Turbo': gpt35turbo,
52
+ 'Claude Opus': claude_opus
53
+ }
54
 
55
+ for model_name, results in api_models.items():
56
+ row = {
57
+ 'Model': model_name,
58
+ 'b4bqa': round(results.get('b4bqa', 0) * 100, 2),
59
+ 'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2),
60
+ 'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2),
61
+ 'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2),
62
+ 'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2),
63
+ 'T': model_info[model_name]['tuning'],
64
+ 'Link': model_info[model_name]['link']
65
+ }
66
+ data.append(row)
67
+
68
+ # Create DataFrame from the collected data
69
  df = pd.DataFrame(data)
70
  df = make_clickable_names(df)
71
  df.drop(columns=["Link"], inplace=True)
72
 
73
+ # Calculate differences between specific evaluation metrics
74
  df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
75
  df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
76
 
 
91
  ]]
92
  df = df[cols]
93
 
94
+ # Save DataFrame to CSV
95
  output_csv = 'data/csv/models_data.csv'
96
  df.to_csv(output_csv, index=False)
97
 
src/models_info.py CHANGED
@@ -76,4 +76,20 @@ model_info = {
76
  "link": "https://huggingface.co/Qwen/Qwen2-7B",
77
  "tuning": "🟒" # Pre-trained
78
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
 
76
  "link": "https://huggingface.co/Qwen/Qwen2-7B",
77
  "tuning": "🟒" # Pre-trained
78
  },
79
+ "GPT-4": {
80
+ "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
81
+ "tuning": "πŸ’¬"
82
+ },
83
+ "GPT-4o": {
84
+ "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
85
+ "tuning": "πŸ’¬"
86
+ },
87
+ "GPT-3.5 Turbo": {
88
+ "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
89
+ "tuning": "πŸ’¬"
90
+ },
91
+ "Claude Opus": {
92
+ "link": "https://huggingface.co/Qwen/Qwen2-7B-v2",
93
+ "tuning": "πŸ’¬"
94
+ }
95
  }