PROBE / app.py
mgyigit's picture
Update app.py
ec6bef2 verified
raw
history blame
9 kB
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import gradio as gr
import pandas as pd
import re
import os
import json
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
from src.about import *
from src.bin.PROBE import run_probe
global data_component, filter_component
def get_baseline_df(selected_methods, selected_metrics):
df = pd.read_csv(CSV_RESULT_PATH)
present_columns = ["method_name"] + selected_metrics
df = df[df['method_name'].isin(selected_methods)][present_columns]
return df
def create_plot(methods_selected, x_metric, y_metric):
df = pd.read_csv(CSV_RESULT_PATH)
filtered_df = df[df['method_name'].isin(methods_selected)]
# Create a Seaborn lineplot with method as hue
plt.figure(figsize=(10, 8)) # Increase figure size
sns.lineplot(
data=filtered_df,
x=x_metric,
y=y_metric,
hue="method_name", # Different colors for different methods
marker="o", # Add markers to the line plot
)
# Add labels and title
plt.xlabel(x_metric)
plt.ylabel(y_metric)
plt.title(f'{y_metric} vs {x_metric} for selected methods')
plt.grid(True)
# Save the plot to display it in Gradio
plot_path = "plot.png"
plt.savefig(plot_path)
plt.close()
return plot_path
def add_new_eval(
human_file,
skempi_file,
model_name_textbox: str,
revision_name_textbox: str,
benchmark_type,
similarity_tasks,
function_prediction_aspect,
function_prediction_dataset,
family_prediction_dataset,
):
representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
results = run_probe(benchmark_type, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
return None
# Function to update leaderboard dynamically based on user selection
def update_leaderboard(selected_methods, selected_metrics):
return get_baseline_df(selected_methods, selected_metrics)
block = gr.Blocks()
with block:
gr.Markdown(LEADERBOARD_INTRODUCTION)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# table jmmmu bench
with gr.TabItem("πŸ… PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
method_names = pd.read_csv(CSV_RESULT_PATH)['method_name'].unique().tolist()
metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist()
metrics_with_method = metric_names.copy()
metric_names.remove('method_name') # Remove method_name from the metric options
# Leaderboard section with method and metric selectors
with gr.Row():
# Add method and metric selectors for leaderboard
leaderboard_method_selector = gr.CheckboxGroup(
choices=method_names, label="Select method_names for Leaderboard", value=method_names, interactive=True
)
leaderboard_metric_selector = gr.CheckboxGroup(
choices=metric_names, label="Select Metrics for Leaderboard", value=metric_names, interactive=True
)
# Display the filtered leaderboard
baseline_value = get_baseline_df(method_names, metric_names)
baseline_header = ["method_name"] + metric_names
baseline_datatype = ['markdown'] + ['number'] * len(metric_names)
data_component = gr.components.Dataframe(
value=baseline_value,
headers=baseline_header,
type="pandas",
datatype=baseline_datatype,
interactive=False,
visible=True,
)
# Update leaderboard when method/metric selection changes
leaderboard_method_selector.change(
update_leaderboard,
inputs=[leaderboard_method_selector, leaderboard_metric_selector],
outputs=data_component
)
leaderboard_metric_selector.change(
update_leaderboard,
inputs=[leaderboard_method_selector, leaderboard_metric_selector],
outputs=data_component
)
# Add the visualizer components (Dropdown, Checkbox, Button, Image)
with gr.Row():
# Visualizer Controls: Smaller and underneath each other
with gr.Column(scale=1):
method_selector = gr.CheckboxGroup(choices=method_names, label="Select method_names", interactive=True, value=method_names)
x_metric_selector = gr.Dropdown(choices=metrics_with_method, label="Select X-axis Metric", interactive=True)
y_metric_selector = gr.Dropdown(choices=metrics_with_method, label="Select Y-axis Metric", interactive=True)
plot_button = gr.Button("Plot")
# Larger plot display
with gr.Column(scale=3):
output_plot = gr.Image(label="Plot", height=735) # Set larger height for the plot
plot_button.click(create_plot, inputs=[method_selector, x_metric_selector, y_metric_selector], outputs=output_plot)
# table 5
with gr.TabItem("πŸ“ About", elem_id="probe-benchmark-tab-table", id=2):
with gr.Row():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your model's representation files here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_name_textbox = gr.Textbox(
label="Model name",
)
revision_name_textbox = gr.Textbox(
label="Revision Model Name",
)
benchmark_type = gr.CheckboxGroup(
choices=TASK_INFO,
label="Benchmark Type",
interactive=True,
)
similarity_tasks = gr.CheckboxGroup(
choices=similarity_tasks_options,
label="Select Similarity Tasks",
interactive=True,
)
function_prediction_aspect = gr.Radio(
choices=function_prediction_aspect_options,
label="Select Function Prediction Aspect",
interactive=True,
)
function_prediction_dataset = gr.Radio(
choices=function_prediction_dataset_options,
label="Select Function Prediction Dataset",
interactive=True,
)
family_prediction_dataset = gr.CheckboxGroup(
choices=family_prediction_dataset_options,
label="Select Family Prediction Dataset",
interactive=True,
)
with gr.Column():
human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
inputs=[
human_file,
skempi_file,
model_name_textbox,
revision_name_textbox,
benchmark_type,
similarity_tasks,
function_prediction_aspect,
function_prediction_dataset,
family_prediction_dataset,
],
)
def refresh_data():
value = get_baseline_df(method_names, metric_names)
return value
with gr.Row():
data_run = gr.Button("Refresh")
data_run.click(refresh_data, outputs=[data_component])
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
block.launch()