File size: 3,253 Bytes
4c59875 be36629 4c59875 be36629 4c59875 be36629 4c59875 be36629 4c59875 be36629 4c59875 be36629 4c59875 be36629 4c59875 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
import json
import pandas as pd
import sys
# Add the path to api-results.py
sys.path.append(os.path.abspath('data/api-results'))
# Now import the API results
from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
from models_info import model_info
directory = 'data/raw-eval-outputs'
data = []
# Function to create a clickable hyperlink for the model name
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
# Function to apply the hyperlink creation function to the DataFrame
def make_clickable_names(df):
df["Model"] = df.apply(
lambda row: model_hyperlink(row["Link"], row["Model"]), axis=1
)
return df
# Iterate over all the files in the directory
for filename in os.listdir(directory):
if filename.endswith(".json"):
filepath = os.path.join(directory, filename)
with open(filepath, 'r') as f:
json_data = json.load(f)
model_name = filename.replace("_results.json", "")
# Extract the accuracy values
results = json_data['results']
row = {'Model': model_name}
for key, value in results.items():
row[key] = round(value['acc,none'] * 100, 2)
# Add the tuning type and link to the row
row['T'] = model_info[model_name]['tuning']
row['Link'] = model_info[model_name]['link']
data.append(row)
# Prepare the API results for integration
api_models = {
'GPT-4': gpt4,
'GPT-4o': gpt4o,
'GPT-3.5 Turbo': gpt35turbo,
'Claude Opus': claude_opus
}
for model_name, results in api_models.items():
row = {
'Model': model_name,
'b4bqa': round(results.get('b4bqa', 0) * 100, 2),
'medmcqa_g2b': round(results['medmcqa_g2b'] * 100, 2),
'medmcqa_orig_filtered': round(results['medmcqa_og'] * 100, 2),
'medqa_4options_g2b': round(results['medqa_g2b'] * 100, 2),
'medqa_4options_orig_filtered': round(results['medqa_og'] * 100, 2),
'T': model_info[model_name]['tuning'],
'Link': model_info[model_name]['link']
}
data.append(row)
# Create DataFrame from the collected data
df = pd.DataFrame(data)
df = make_clickable_names(df)
df.drop(columns=["Link"], inplace=True)
# Calculate differences between specific evaluation metrics
df['medmcqa_diff'] = (df['medmcqa_g2b'] - df['medmcqa_orig_filtered']).round(2)
df['medqa_diff'] = (df['medqa_4options_g2b'] - df['medqa_4options_orig_filtered']).round(2)
# Reorder columns
cols = [
"T",
"Model",
"b4bqa",
"b4b",
"medmcqa_g2b",
"medmcqa_orig_filtered",
"medmcqa_diff",
"medqa_4options_g2b",
"medqa_4options_orig_filtered",
"medqa_diff"
] + [col for col in df.columns if col not in [
"T", "Model", "b4bqa", "b4b", "medmcqa_g2b", "medmcqa_orig_filtered", "medmcqa_diff", "medqa_4options_g2b", "medqa_4options_orig_filtered", "medqa_diff"
]]
df = df[cols]
# Save DataFrame to CSV
output_csv = 'data/csv/models_data.csv'
df.to_csv(output_csv, index=False)
print(f"DataFrame saved to {output_csv}")
|