PROBE

Sleeping

App Files Files Community

PROBE / app.py

mgyigit

Update app.py

363f92a verified 3 months ago

raw

history blame

7.74 kB

	__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']

	import gradio as gr
	import pandas as pd
	import re
	import os
	import json
	import yaml
	import matplotlib.pyplot as plt

	from src.about import *
	from src.bin.PROBE import run_probe

	global data_component, filter_component

	def get_baseline_df():
	df = pd.read_csv(CSV_RESULT_PATH)
	present_columns = ["Method"] + checkbox_group.value
	df = df[present_columns]
	return df

	# Function to create the plot
	def create_plot(methods_selected, x_metric, y_metric):
	df = pd.read_csv(CSV_RESULT_PATH)
	filtered_df = df[df['Method'].isin(methods_selected)]

	# Create a larger plot
	plt.figure(figsize=(10, 8)) # Increase the figure size
	for method in methods_selected:
	method_data = filtered_df[filtered_df['Method'] == method]
	plt.plot(method_data[x_metric], method_data[y_metric], label=method, marker='o')

	plt.xlabel(x_metric)
	plt.ylabel(y_metric)
	plt.title(f'{y_metric} vs {x_metric} for selected methods')
	plt.legend()
	plt.grid(True)

	# Save the plot to display it in Gradio
	plot_path = "plot.png"
	plt.savefig(plot_path)
	plt.close()

	return plot_path

	def add_new_eval(
	human_file,
	skempi_file,
	model_name_textbox: str,
	revision_name_textbox: str,
	benchmark_type,
	similarity_tasks,
	function_prediction_aspect,
	function_prediction_dataset,
	family_prediction_dataset,
	):
	representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox
	results = run_probe(benchmark_type, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset)
	return None

	block = gr.Blocks()

	with block:
	gr.Markdown(LEADERBOARD_INTRODUCTION)

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	# table jmmmu bench
	with gr.TabItem("🏅 PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1):

	# Add the visualizer components (Dropdown, Checkbox, Button, Image)
	with gr.Row():
	method_names = pd.read_csv(CSV_RESULT_PATH)['Method'].unique().tolist()
	metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist()
	metric_names.remove('Method') # Remove Method from the metric options

	# Visualizer Controls: Smaller and underneath each other
	with gr.Column(scale=1):
	method_selector = gr.CheckboxGroup(choices=method_names, label="Select Methods", interactive=True)
	x_metric_selector = gr.Dropdown(choices=metric_names, label="Select X-axis Metric", interactive=True)
	y_metric_selector = gr.Dropdown(choices=metric_names, label="Select Y-axis Metric", interactive=True)
	plot_button = gr.Button("Plot")

	# Larger plot display
	with gr.Column(scale=3):
	output_plot = gr.Image(label="Plot", height=480) # Set larger height for the plot

	plot_button.click(create_plot, inputs=[method_selector, x_metric_selector, y_metric_selector], outputs=output_plot)

	# Now the rest of the UI elements as they were before
	checkbox_group = gr.CheckboxGroup(
	choices=TASK_INFO,
	label="Benchmark Type",
	interactive=True,
	) # User can select the evaluation dimension

	baseline_value = get_baseline_df()
	baseline_header = ["Method"] + checkbox_group.value
	baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value)

	data_component = gr.components.Dataframe(
	value=baseline_value,
	headers=baseline_header,
	type="pandas",
	datatype=baseline_datatype,
	interactive=False,
	visible=True,
	)

	# table 5
	with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2):
	with gr.Row():
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3):
	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

	with gr.Row():
	gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text")

	with gr.Row():
	with gr.Column():
	model_name_textbox = gr.Textbox(
	label="Model name",
	)
	revision_name_textbox = gr.Textbox(
	label="Revision Model Name",
	)

	benchmark_type = gr.CheckboxGroup(
	choices=TASK_INFO,
	label="Benchmark Type",
	interactive=True,
	)
	similarity_tasks = gr.CheckboxGroup(
	choices=similarity_tasks_options,
	label="Select Similarity Tasks",
	interactive=True,
	)

	function_prediction_aspect = gr.Radio(
	choices=function_prediction_aspect_options,
	label="Select Function Prediction Aspect",
	interactive=True,
	)

	function_prediction_dataset = gr.Radio(
	choices=function_prediction_dataset_options,
	label="Select Function Prediction Dataset",
	interactive=True,
	)

	family_prediction_dataset = gr.CheckboxGroup(
	choices=family_prediction_dataset_options,
	label="Select Family Prediction Dataset",
	interactive=True,
	)

	with gr.Column():
	human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath')
	skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath')

	submit_button = gr.Button("Submit Eval")
	submission_result = gr.Markdown()
	submit_button.click(
	add_new_eval,
	inputs=[
	human_file,
	skempi_file,
	model_name_textbox,
	revision_name_textbox,
	benchmark_type,
	similarity_tasks,
	function_prediction_aspect,
	function_prediction_dataset,
	family_prediction_dataset,
	],
	)

	def refresh_data():
	value = get_baseline_df()
	return value

	with gr.Row():
	data_run = gr.Button("Refresh")
	data_run.click(refresh_data, outputs=[data_component])

	with gr.Accordion("Citation", open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	elem_id="citation-button",
	show_copy_button=True,
	)

	block.launch()