__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] import gradio as gr import pandas as pd import re import os import json import yaml import matplotlib.pyplot as plt import seaborn as sns from src.about import * from src.bin.PROBE import run_probe global data_component, filter_component def get_baseline_df(selected_methods, selected_metrics): df = pd.read_csv(CSV_RESULT_PATH) present_columns = ["method_name"] + selected_metrics df = df[df['method_name'].isin(selected_methods)][present_columns] return df def create_plot(methods_selected, x_metric, y_metric): df = pd.read_csv(CSV_RESULT_PATH) filtered_df = df[df['method_name'].isin(methods_selected)] # Create a Seaborn lineplot with method as hue plt.figure(figsize=(10, 8)) # Increase figure size sns.lineplot( data=filtered_df, x=x_metric, y=y_metric, hue="method_name", # Different colors for different methods marker="o", # Add markers to the line plot ) # Add labels and title plt.xlabel(x_metric) plt.ylabel(y_metric) plt.title(f'{y_metric} vs {x_metric} for selected methods') plt.grid(True) # Save the plot to display it in Gradio plot_path = "plot.png" plt.savefig(plot_path) plt.close() return plot_path def add_new_eval( human_file, skempi_file, model_name_textbox: str, revision_name_textbox: str, benchmark_type, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset, ): representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox results = run_probe(benchmark_type, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset) return None # Function to update leaderboard dynamically based on user selection def update_leaderboard(selected_methods, selected_metrics): return get_baseline_df(selected_methods, selected_metrics) block = gr.Blocks() with block: gr.Markdown(LEADERBOARD_INTRODUCTION) with gr.Tabs(elem_classes="tab-buttons") as tabs: # table jmmmu bench with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1): method_names = pd.read_csv(CSV_RESULT_PATH)['method_name'].unique().tolist() metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist() metrics_with_method = metric_names.copy() metric_names.remove('method_name') # Remove method_name from the metric options # Leaderboard section with method and metric selectors with gr.Row(): # Add method and metric selectors for leaderboard leaderboard_method_selector = gr.CheckboxGroup( choices=method_names, label="Select method_names for Leaderboard", value=method_names, interactive=True ) leaderboard_metric_selector = gr.CheckboxGroup( choices=metric_names, label="Select Metrics for Leaderboard", value=metric_names, interactive=True ) # Display the filtered leaderboard baseline_value = get_baseline_df(method_names, metric_names) baseline_header = ["method_name"] + metric_names baseline_datatype = ['markdown'] + ['number'] * len(metric_names) data_component = gr.components.Dataframe( value=baseline_value, headers=baseline_header, type="pandas", datatype=baseline_datatype, interactive=False, visible=True, ) # Update leaderboard when method/metric selection changes leaderboard_method_selector.change( update_leaderboard, inputs=[leaderboard_method_selector, leaderboard_metric_selector], outputs=data_component ) leaderboard_metric_selector.change( update_leaderboard, inputs=[leaderboard_method_selector, leaderboard_metric_selector], outputs=data_component ) # Add the visualizer components (Dropdown, Checkbox, Button, Image) with gr.Row(): # Visualizer Controls: Smaller and underneath each other with gr.Column(scale=1): method_selector = gr.CheckboxGroup(choices=method_names, label="Select method_names", interactive=True, value=method_names) x_metric_selector = gr.Dropdown(choices=metrics_with_method, label="Select X-axis Metric", interactive=True) y_metric_selector = gr.Dropdown(choices=metrics_with_method, label="Select Y-axis Metric", interactive=True) plot_button = gr.Button("Plot") # Larger plot display with gr.Column(scale=3): output_plot = gr.Image(label="Plot", height=735) # Set larger height for the plot plot_button.click(create_plot, inputs=[method_selector, x_metric_selector, y_metric_selector], outputs=output_plot) # table 5 with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2): with gr.Row(): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3): with gr.Row(): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Row(): gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text") with gr.Row(): with gr.Column(): model_name_textbox = gr.Textbox( label="Model name", ) revision_name_textbox = gr.Textbox( label="Revision Model Name", ) benchmark_type = gr.CheckboxGroup( choices=TASK_INFO, label="Benchmark Type", interactive=True, ) similarity_tasks = gr.CheckboxGroup( choices=similarity_tasks_options, label="Select Similarity Tasks", interactive=True, ) function_prediction_aspect = gr.Radio( choices=function_prediction_aspect_options, label="Select Function Prediction Aspect", interactive=True, ) function_prediction_dataset = gr.Radio( choices=function_prediction_dataset_options, label="Select Function Prediction Dataset", interactive=True, ) family_prediction_dataset = gr.CheckboxGroup( choices=family_prediction_dataset_options, label="Select Family Prediction Dataset", interactive=True, ) with gr.Column(): human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath') skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath') submit_button = gr.Button("Submit Eval") submission_result = gr.Markdown() submit_button.click( add_new_eval, inputs=[ human_file, skempi_file, model_name_textbox, revision_name_textbox, benchmark_type, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset, ], ) def refresh_data(): value = get_baseline_df(method_names, metric_names) return value with gr.Row(): data_run = gr.Button("Refresh") data_run.click(refresh_data, outputs=[data_component]) with gr.Accordion("Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", show_copy_button=True, ) block.launch()