Spaces:
Sleeping
Sleeping
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
import gradio as gr | |
import pandas as pd | |
import re | |
import os | |
import json | |
import yaml | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import plotnine as p9 | |
import sys | |
sys.path.append('./src') | |
sys.path.append('.') | |
from src.about import * | |
from src.saving_utils import * | |
from src.vis_utils import * | |
from src.bin.PROBE import run_probe | |
def add_new_eval( | |
human_file, | |
skempi_file, | |
model_name_textbox: str, | |
revision_name_textbox: str, | |
benchmark_types, | |
similarity_tasks, | |
function_prediction_aspect, | |
function_prediction_dataset, | |
family_prediction_dataset, | |
save, | |
): | |
representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox | |
results = run_probe(benchmark_types, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset) | |
print(results) | |
if save: | |
save_results(representation_name, benchmark_types, results) | |
print("Results are saved!") | |
return 0 | |
def refresh_data(): | |
benchmark_types = ["similarity", "function", "family", "affinity", "leaderboard"] | |
for benchmark_type in benchmark_types: | |
path = f"/tmp/{benchmark_type}_results.csv" | |
if os.path.exists(path): | |
os.remove(path) | |
benchmark_types.remove("leaderboard") | |
download_from_hub(benchmark_types) | |
# Define a function to update metrics based on benchmark type selection | |
def update_metrics(selected_benchmarks): | |
updated_metrics = set() | |
for benchmark in selected_benchmarks: | |
updated_metrics.update(benchmark_metric_mapping.get(benchmark, [])) | |
return list(updated_metrics) | |
# Define a function to update the leaderboard | |
def update_leaderboard(selected_methods, selected_metrics): | |
updated_df = get_baseline_df(selected_methods, selected_metrics) | |
return updated_df | |
block = gr.Blocks() | |
with block: | |
gr.Markdown(LEADERBOARD_INTRODUCTION) | |
with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1): | |
leaderboard = get_baseline_df(None, None) #get baseline leaderboard without filtering | |
method_names = leaderboard['Method'].unique().tolist() | |
metric_names = leaderboard.columns.tolist() | |
metrics_with_method = metric_names.copy() | |
metric_names.remove('Method') # Remove method_name from the metric options | |
benchmark_metric_mapping = { | |
"similarity": [metric for metric in metric_names if metric.startswith('sim_')], | |
"function": [metric for metric in metric_names if metric.startswith('func')], | |
"family": [metric for metric in metric_names if metric.startswith('fam_')], | |
"affinity": [metric for metric in metric_names if metric.startswith('aff_')], | |
} | |
# Leaderboard section with method and metric selectors | |
leaderboard_method_selector = gr.CheckboxGroup( | |
choices=method_names, label="Select Methods for the Leaderboard", value=method_names, interactive=True | |
) | |
benchmark_type_selector = gr.CheckboxGroup( | |
choices=list(benchmark_metric_mapping.keys()), | |
label="Select Benchmark Types", | |
value=None, # Initially select all benchmark types | |
interactive=True | |
) | |
leaderboard_metric_selector = gr.CheckboxGroup( | |
choices=metric_names, label="Select Metrics for the Leaderboard", value=None, interactive=True | |
) | |
# Display the filtered leaderboard | |
baseline_value = get_baseline_df(method_names, metric_names) | |
baseline_header = ["Method"] + metric_names | |
baseline_datatype = ['markdown'] + ['number'] * len(metric_names) | |
with gr.Row(show_progress=True, variant='panel'): | |
data_component = gr.components.Dataframe( | |
value=baseline_value, | |
headers=baseline_header, | |
type="pandas", | |
datatype=baseline_datatype, | |
interactive=False, | |
visible=True, | |
) | |
# Update leaderboard when method/metric selection changes | |
leaderboard_method_selector.change( | |
get_baseline_df, | |
inputs=[leaderboard_method_selector, leaderboard_metric_selector], | |
outputs=data_component | |
) | |
# Update metrics when benchmark type changes | |
benchmark_type_selector.change( | |
lambda selected_benchmarks: update_metrics(selected_benchmarks), | |
inputs=[benchmark_type_selector], | |
outputs=leaderboard_metric_selector | |
) | |
leaderboard_metric_selector.change( | |
get_baseline_df, | |
inputs=[leaderboard_method_selector, leaderboard_metric_selector], | |
outputs=data_component | |
) | |
with gr.Row(): | |
gr.Markdown( | |
""" | |
## **Below, you can visualize the results displayed in the Leaderboard.** | |
### Once you choose a benchmark type, the related options for metrics, datasets, and other parameters will become visible. Select the methods and metrics of interest from the options to generate visualizations. | |
""" | |
) | |
# Dropdown for benchmark type | |
benchmark_type_selector = gr.Dropdown(choices=list(benchmark_specific_metrics.keys()), label="Select Benchmark Type", value=None) | |
with gr.Row(): | |
# Dynamic selectors | |
x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False) | |
y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False) | |
aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False) | |
dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False) | |
single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False) | |
method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names) | |
# Button to draw the plot for the selected benchmark | |
plot_button = gr.Button("Plot") | |
with gr.Row(show_progress=True, variant='panel'): | |
plot_output = gr.Image(label="Plot") | |
# Update selectors when benchmark type changes | |
benchmark_type_selector.change( | |
update_metric_choices, | |
inputs=[benchmark_type_selector], | |
outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector] | |
) | |
plot_button.click( | |
benchmark_plot, | |
inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_selector, single_metric_selector], | |
outputs=plot_output | |
) | |
with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2): | |
with gr.Row(): | |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3): | |
with gr.Row(): | |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Column(): | |
model_name_textbox = gr.Textbox( | |
label="Method name", | |
) | |
revision_name_textbox = gr.Textbox( | |
label="Revision Method Name", | |
) | |
benchmark_types = gr.CheckboxGroup( | |
choices=TASK_INFO, | |
label="Benchmark Types", | |
interactive=True, | |
) | |
similarity_tasks = gr.CheckboxGroup( | |
choices=similarity_tasks_options, | |
label="Similarity Tasks", | |
interactive=True, | |
) | |
function_prediction_aspect = gr.Radio( | |
choices=function_prediction_aspect_options, | |
label="Function Prediction Aspects", | |
interactive=True, | |
) | |
family_prediction_dataset = gr.CheckboxGroup( | |
choices=family_prediction_dataset_options, | |
label="Family Prediction Datasets", | |
interactive=True, | |
) | |
function_dataset = gr.Textbox( | |
label="Function Prediction Datasets", | |
visible=False, | |
value="All_Data_Sets" | |
) | |
save_checkbox = gr.Checkbox( | |
label="Save results for leaderboard and visualization", | |
value=True | |
) | |
#with gr.Column(): | |
with gr.Row(): | |
human_file = gr.components.File(label="The representation file (csv) for Human dataset", file_count="single", type='filepath') | |
skempi_file = gr.components.File(label="The representation file (csv) for SKEMPI dataset", file_count="single", type='filepath') | |
submit_button = gr.Button("Submit Eval") | |
submission_result = gr.Markdown() | |
submit_button.click( | |
add_new_eval, | |
inputs=[ | |
human_file, | |
skempi_file, | |
model_name_textbox, | |
revision_name_textbox, | |
benchmark_types, | |
similarity_tasks, | |
function_prediction_aspect, | |
function_dataset, | |
family_prediction_dataset, | |
save_checkbox, | |
], | |
) | |
with gr.Row(): | |
data_run = gr.Button("Refresh") | |
data_run.click(refresh_data, outputs=[data_component]) | |
with gr.Accordion("Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
block.launch() | |