|
import os |
|
import json |
|
import pandas as pd |
|
import sys |
|
|
|
|
|
sys.path.append(os.path.abspath('data/api-results')) |
|
|
|
|
|
from api_results import gpt4, gpt4o, gpt35turbo, claude_opus |
|
from models_info import model_info |
|
|
|
directory = 'data/raw-eval-outputs' |
|
data = [] |
|
|
|
|
|
def model_hyperlink(link, model_name): |
|
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
|
|
|
|
def make_clickable_names(df): |
|
df["Model"] = df.apply( |
|
lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1 |
|
) |
|
return df |
|
|
|
|
|
for filename in os.listdir(directory): |
|
if filename.endswith(".json"): |
|
filepath = os.path.join(directory, filename) |
|
with open(filepath, 'r') as f: |
|
json_data = json.load(f) |
|
model_name = filename.replace("_results.json", "") |
|
|
|
|
|
results = json_data['results'] |
|
row = {'Model': model_name} |
|
for key, value in results.items(): |
|
row[key] = round(value['acc,none'] * 100, 2) |
|
|
|
|
|
row['T'] = model_info[model_name]['tuning'] |
|
row['Link'] = model_info[model_name]['link'] |
|
|
|
data.append(row) |
|
|
|
|
|
api_models = { |
|
'GPT-4': gpt4, |
|
'GPT-4o': gpt4o, |
|
'GPT-3.5 Turbo': gpt35turbo, |
|
'Claude Opus': claude_opus |
|
} |
|
|
|
for model_name, results in api_models.items(): |
|
row = { |
|
'Model': model_name, |
|
'b4bqa': round(results.get('b4bqa', 0) * 100, 2), |
|
'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2), |
|
'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2), |
|
'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2), |
|
'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2), |
|
'T': model_info[model_name]['tuning'], |
|
'Link': model_info[model_name]['link'] |
|
} |
|
data.append(row) |
|
|
|
|
|
df = pd.DataFrame(data) |
|
df = make_clickable_names(df) |
|
df.drop(columns=["Link"], inplace=True) |
|
|
|
|
|
df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2) |
|
df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2) |
|
|
|
|
|
cols = [ |
|
"T", |
|
"Model", |
|
"b4bqa", |
|
"b4b", |
|
"medmcqa_g2b", |
|
"medmcqa_orig_filtered", |
|
"medmcqa_diff", |
|
"medqa_4options_g2b", |
|
"medqa_4options_orig_filtered", |
|
"medqa_diff" |
|
] + [col for col in df.columns if col not in [ |
|
"T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff" |
|
]] |
|
df = df[cols] |
|
|
|
|
|
output_csv = 'data/csv/models_data.csv' |
|
df.to_csv(output_csv, index=False) |
|
|
|
print(f"DataFrame saved to {output_csv}") |
|
|