PROBE

Sleeping

File size: 5,782 Bytes

__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']

import gradio as gr
import pandas as pd
import re
import pandas as pd
import os
import json
import yaml

from src.about import *
from src.bin.PROBE import run_probe

global data_component, filter_component


def get_baseline_df():
    df = pd.read_csv(CSV_RESULT_PATH)
    present_columns = ["Method"] + checkbox_group.value
    df = df[present_columns]
    return df


def add_new_eval(
    human_file,
    skempi_file,
    model_name_textbox: str,
    revision_name_textbox: str,
    benchmark_type: str,
):
    representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox

    run_probe(benchmark_type, representation_name, human_file, skempi_file)
        

    return None

block = gr.Blocks()

with block:
    gr.Markdown(
        LEADERBOARD_INTRODUCTION
    )
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        # table jmmmu bench
        with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
            # selection for column part:
            checkbox_group = gr.CheckboxGroup(
                choices=TASK_INFO,
                label="Benchmark Type",
                interactive=True,
            ) # user can select the evaluation dimension

            baseline_value = get_baseline_df()
            baseline_header = ["Method"] + checkbox_group.value
            baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)

            data_component = gr.components.Dataframe(
                value=baseline_value,
                headers=baseline_header,
                type="pandas",
                datatype=baseline_datatype,
                interactive=False,
                visible=True,
                )

        # table 5
        with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
            with gr.Row():
                gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

        with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
            with gr.Row():
                gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

            with gr.Row():
                gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    model_name_textbox = gr.Textbox(
                        label="Model name",
                        )
                    revision_name_textbox = gr.Textbox(
                        label="Revision Model Name",
                    )
                    # Selection for benchmark type from (similartiy, family, function, affinity) to eval the representations (chekbox)
                    benchmark_type = gr.CheckboxGroup(
                        choices=TASK_INFO,
                        label="Benchmark Type",
                        interactive=True,
                    )
                    similarity_tasks = gr.CheckboxGroup(
                        choices=similarity_tasks_options,
                        label="Select Similarity Tasks",
                        interactive=True,
                    )
                
                    # Dropdown for function prediction aspect
                    function_prediction_aspect = gr.Radio(
                        choices=function_prediction_aspect_options,
                        label="Select Function Prediction Aspect",
                        interactive=True,
                    )
                
                    # Dropdown for function prediction dataset
                    function_prediction_dataset = gr.Radio(
                        choices=function_prediction_dataset_options,
                        label="Select Function Prediction Dataset",
                        interactive=True,
                    )
                
                    # Checkbox for family prediction dataset
                    family_prediction_dataset = gr.CheckboxGroup(
                        choices=family_prediction_dataset_options,
                        label="Select Family Prediction Dataset",
                        interactive=True,
                    )

            with gr.Column():
                human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
                skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
    
                submit_button = gr.Button("Submit Eval")
                submission_result = gr.Markdown()
                submit_button.click(
                    add_new_eval,
                    inputs = [
                        human_file,
                        skempi_file,
                        model_name_textbox,
                        revision_name_textbox,
                        benchmark_type,
                        similarity_tasks,
                        function_prediction_aspect,
                        function_prediction_dataset,
                        family_prediction_dataset,
                    ],
                )

    def refresh_data():
        value = get_baseline_df()

        return value

    with gr.Row():
        data_run = gr.Button("Refresh")
        data_run.click(
            refresh_data, outputs=[data_component]
        )

    with gr.Accordion("Citation", open=False):
        citation_button = gr.Textbox(
            value=CITATION_BUTTON_TEXT,
            label=CITATION_BUTTON_LABEL,
            elem_id="citation-button",
            show_copy_button=True,
        )

block.launch()