PROBE

Sleeping

File size: 7,739 Bytes

__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']

import gradio as gr
import pandas as pd
import re
import os
import json
import yaml
import matplotlib.pyplot as plt

from src.about import *
from src.bin.PROBE import run_probe

global data_component, filter_component

def get_baseline_df():
    df = pd.read_csv(CSV_RESULT_PATH)
    present_columns = ["Method"] + checkbox_group.value
    df = df[present_columns]
    return df

# Function to create the plot
def create_plot(methods_selected, x_metric, y_metric):
    df = pd.read_csv(CSV_RESULT_PATH)
    filtered_df = df[df['Method'].isin(methods_selected)]
    
    # Create a larger plot
    plt.figure(figsize=(10, 8))  # Increase the figure size
    for method in methods_selected:
        method_data = filtered_df[filtered_df['Method'] == method]
        plt.plot(method_data[x_metric], method_data[y_metric], label=method, marker='o')
    
    plt.xlabel(x_metric)
    plt.ylabel(y_metric)
    plt.title(f'{y_metric} vs {x_metric} for selected methods')
    plt.legend()
    plt.grid(True)
    
    # Save the plot to display it in Gradio
    plot_path = "plot.png"
    plt.savefig(plot_path)
    plt.close()
    
    return plot_path

def add_new_eval(
    human_file,
    skempi_file,
    model_name_textbox: str,
    revision_name_textbox: str,
    benchmark_type,
    similarity_tasks,
    function_prediction_aspect,
    function_prediction_dataset,
    family_prediction_dataset,
):
    representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
    results = run_probe(benchmark_type, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
    return None

block = gr.Blocks()

with block:
    gr.Markdown(LEADERBOARD_INTRODUCTION)
    
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        # table jmmmu bench
        with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
            
            # Add the visualizer components (Dropdown, Checkbox, Button, Image)
            with gr.Row():
                method_names = pd.read_csv(CSV_RESULT_PATH)['Method'].unique().tolist()
                metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist()
                metric_names.remove('Method')  # Remove Method from the metric options

                # Visualizer Controls: Smaller and underneath each other
                with gr.Column(scale=1):
                    method_selector = gr.CheckboxGroup(choices=method_names, label="Select Methods", interactive=True)
                    x_metric_selector = gr.Dropdown(choices=metric_names, label="Select X-axis Metric", interactive=True)
                    y_metric_selector = gr.Dropdown(choices=metric_names, label="Select Y-axis Metric", interactive=True)
                    plot_button = gr.Button("Plot")
                
                # Larger plot display
                with gr.Column(scale=3):
                    output_plot = gr.Image(label="Plot", height=480)  # Set larger height for the plot
                
            plot_button.click(create_plot, inputs=[method_selector, x_metric_selector, y_metric_selector], outputs=output_plot)
            
            # Now the rest of the UI elements as they were before
            checkbox_group = gr.CheckboxGroup(
                choices=TASK_INFO,
                label="Benchmark Type",
                interactive=True,
            )  # User can select the evaluation dimension
            
            baseline_value = get_baseline_df()
            baseline_header = ["Method"] + checkbox_group.value
            baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)
            
            data_component = gr.components.Dataframe(
                value=baseline_value,
                headers=baseline_header,
                type="pandas",
                datatype=baseline_datatype,
                interactive=False,
                visible=True,
            )

        # table 5
        with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
            with gr.Row():
                gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
            with gr.Row():
                gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

            with gr.Row():
                gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    model_name_textbox = gr.Textbox(
                        label="Model name",
                    )
                    revision_name_textbox = gr.Textbox(
                        label="Revision Model Name",
                    )
                    
                    benchmark_type = gr.CheckboxGroup(
                        choices=TASK_INFO,
                        label="Benchmark Type",
                        interactive=True,
                    )
                    similarity_tasks = gr.CheckboxGroup(
                        choices=similarity_tasks_options,
                        label="Select Similarity Tasks",
                        interactive=True,
                    )
                
                    function_prediction_aspect = gr.Radio(
                        choices=function_prediction_aspect_options,
                        label="Select Function Prediction Aspect",
                        interactive=True,
                    )
                
                    function_prediction_dataset = gr.Radio(
                        choices=function_prediction_dataset_options,
                        label="Select Function Prediction Dataset",
                        interactive=True,
                    )
                
                    family_prediction_dataset = gr.CheckboxGroup(
                        choices=family_prediction_dataset_options,
                        label="Select Family Prediction Dataset",
                        interactive=True,
                    )

            with gr.Column():
                human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
                skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
    
                submit_button = gr.Button("Submit Eval")
                submission_result = gr.Markdown()
                submit_button.click(
                    add_new_eval,
                    inputs=[
                        human_file,
                        skempi_file,
                        model_name_textbox,
                        revision_name_textbox,
                        benchmark_type,
                        similarity_tasks,
                        function_prediction_aspect,
                        function_prediction_dataset,
                        family_prediction_dataset,
                    ],
                )

    def refresh_data():
        value = get_baseline_df()
        return value

    with gr.Row():
        data_run = gr.Button("Refresh")
        data_run.click(refresh_data, outputs=[data_component])

    with gr.Accordion("Citation", open=False):
        citation_button = gr.Textbox(
            value=CITATION_BUTTON_TEXT,
            label=CITATION_BUTTON_LABEL,
            elem_id="citation-button",
            show_copy_button=True,
        )

block.launch()