File size: 3,253 Bytes
4c59875
 
 
be36629
 
 
 
 
 
 
4c59875
 
 
 
 
be36629
4c59875
 
 
be36629
4c59875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be36629
 
 
 
 
 
 
4c59875
be36629
 
 
 
 
 
 
 
 
 
 
 
 
 
4c59875
 
 
 
be36629
4c59875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be36629
4c59875
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import json
import pandas as pd
import sys

# Add the path to api-results.py
sys.path.append(os.path.abspath('data/api-results'))

# Now import the API results
from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
from models_info import model_info

directory = 'data/raw-eval-outputs'
data = []

# Function to create a clickable hyperlink for the model name
def model_hyperlink(link, model_name):
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'

# Function to apply the hyperlink creation function to the DataFrame
def make_clickable_names(df):
    df["Model"] = df.apply(
        lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
    )
    return df

# Iterate over all the files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".json"):
        filepath = os.path.join(directory, filename)
        with open(filepath, 'r') as f:
            json_data = json.load(f)
            model_name = filename.replace("_results.json", "")
            
            # Extract the accuracy values
            results = json_data['results']
            row = {'Model': model_name}
            for key, value in results.items():
                row[key] = round(value['acc,none'] * 100, 2)
            
            # Add the tuning type and link to the row
            row['T'] = model_info[model_name]['tuning']
            row['Link'] = model_info[model_name]['link']
            
            data.append(row)

# Prepare the API results for integration
api_models = {
    'GPT-4': gpt4,
    'GPT-4o': gpt4o,
    'GPT-3.5 Turbo': gpt35turbo,
    'Claude Opus': claude_opus
}

for model_name, results in api_models.items():
    row = {
        'Model': model_name,
        'b4bqa': round(results.get('b4bqa', 0) * 100, 2),
        'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2),
        'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2),
        'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2),
        'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2),
        'T': model_info[model_name]['tuning'],
        'Link': model_info[model_name]['link']
    }
    data.append(row)

# Create DataFrame from the collected data
df = pd.DataFrame(data)
df = make_clickable_names(df)
df.drop(columns=["Link"], inplace=True)

# Calculate differences between specific evaluation metrics
df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)

# Reorder columns
cols = [
    "T", 
    "Model", 
    "b4bqa", 
    "b4b", 
    "medmcqa_g2b", 
    "medmcqa_orig_filtered", 
    "medmcqa_diff", 
    "medqa_4options_g2b", 
    "medqa_4options_orig_filtered", 
    "medqa_diff"
] + [col for col in df.columns if col not in [
    "T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff"
]]
df = df[cols]

# Save DataFrame to CSV
output_csv = 'data/csv/models_data.csv'
df.to_csv(output_csv, index=False)

print(f"DataFrame saved to {output_csv}")