PROBE / app.py
mgyigit's picture
Update app.py
53feeb3 verified
raw
history blame
5.17 kB
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import gradio as gr
import pandas as pd
import re
import pandas as pd
import os
import json
from src.about import *
global data_component, filter_component
def get_baseline_df():
df = pd.read_csv(CSV_RESULT_PATH)
present_columns = ["Method"] + checkbox_group.value
df = df[present_columns]
return df
def update_yaml(representation_name, benchmark_type, human_file_path, skempi_file_path):
with open("./src/bin/probe_config.yaml", 'r') as file:
yaml_data = yaml.safe_load(file)
yaml_data['representation_name'] = representation_name
yaml_data['benchmark'] = benchmark_type
yaml_data['representation_file_human'] = human_file
yaml_data['representation_file_affinity'] = skempi_file
with open("./src/bin/probe_config.yaml", "w") as file:
yaml.dump(yaml_data, file)
return None
def add_new_eval(
human_file,
skempi_file,
model_name_textbox: str,
revision_name_textbox: str,
benchmark_type: str,
):
representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
update_yaml(representation_name, benchmark_type, human_file, skempi_file)
# Save human and skempi files under ./src/data/representation_vectors using pandas
print(human_file)
df = pd.read_csv(human_file)
print(df.head().to_string())
return None
if human_file is not None:
human_df = pd.read_csv(human_file)
human_df.to_csv(f"./src/data/representation_vectors/{representation_name}_human.csv", index=False)
return None
block = gr.Blocks()
with block:
gr.Markdown(
LEADERBOARD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# table jmmmu bench
with gr.TabItem("πŸ… PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):
# selection for column part:
checkbox_group = gr.CheckboxGroup(
choices=TASK_INFO,
label="Benchmark Type",
interactive=True,
) # user can select the evaluation dimension
baseline_value = get_baseline_df()
baseline_header = ["Method"] + checkbox_group.value
baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)
data_component = gr.components.Dataframe(
value=baseline_value,
headers=baseline_header,
type="pandas",
datatype=baseline_datatype,
interactive=False,
visible=True,
)
# table 5
with gr.TabItem("πŸ“ About", elem_id="probe-benchmark-tab-table", id=2):
with gr.Row():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your model's representation files here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_name_textbox = gr.Textbox(
label="Model name",
)
revision_name_textbox = gr.Textbox(
label="Revision Model Name",
)
# Selection for benchmark type from (similartiy, family, function, affinity) to eval the representations (chekbox)
benchmark_type = gr.CheckboxGroup(
choices=TASK_INFO,
label="Benchmark Type",
interactive=True,
)
with gr.Column():
human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
inputs = [
human_file,
skempi_file,
model_name_textbox,
revision_name_textbox,
benchmark_type
],
)
def refresh_data():
value = get_baseline_df()
return value
with gr.Row():
data_run = gr.Button("Refresh")
data_run.click(
refresh_data, outputs=[data_component]
)
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
block.launch()