Spaces:

w601sxs
/

SLM-Leaderboard

Running

App Files Files Community

SLM-Leaderboard / app.py

w601sxs

reverted

992f74a 2 months ago

raw

history blame contribute delete

5.61 kB

	import re
	import streamlit as st
	import requests
	import pandas as pd
	from io import StringIO
	import plotly.graph_objs as go
	import plotly.express as px


	def convert_markdown_table_to_dataframe(md_content):
	"""
	Converts a markdown table to a Pandas DataFrame, handling special characters, links,
	and extracting Hugging Face URLs.
	"""
	cleaned_content = re.sub(r'\\|\s$', '', re.sub(r'^\\|\s', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
	df = pd.read_csv(StringIO(cleaned_content), sep="\\|", engine='python')
	df = df.drop(0, axis=0) # Remove first row if it's not the header
	df.columns = df.columns.str.strip() # Clean column names

	# Extract Model names and URLs
	model_link_pattern = r'\[(.?)\]$(.?)$'
	df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
	df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
	return df


	def create_bar_chart(df, metric, color_map, key_suffix):
	"""
	Creates and displays a bar chart for a given metric.
	"""
	st.write(f"### {metric} Scores")
	if metric not in df.columns:
	st.write(f"No data available for {metric}.")
	return

	sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True)
	fig = go.Figure(go.Bar(
	x=sorted_df[metric],
	y=sorted_df['Model'],
	orientation='h',
	marker=dict(color=sorted_df[metric], colorscale=color_map)
	))
	fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
	st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")


	def create_radar_chart(df, metric_columns):
	"""
	Create a radar chart for the top 10 models by "Average" score.
	"""
	st.write("### Radar Chart (Top 10 Models by Average Score)")

	if 'Average' not in df.columns:
	st.write("Average column not found.")
	return

	top_10_df = df.nlargest(10, 'Average')
	if top_10_df.empty:
	st.write("No models available for the radar chart.")
	return

	radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')

	fig = go.Figure()

	for model_name, row in radar_data.iterrows():
	fig.add_trace(go.Scatterpolar(
	r=row.values,
	theta=metric_columns,
	fill='toself',
	name=model_name
	))

	fig.update_layout(
	polar=dict(
	radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()]) # Dynamic range
	),
	showlegend=True
	)

	st.plotly_chart(fig, use_container_width=True, key="radar_chart")


	def main():
	st.set_page_config(page_title="SLM Leaderboard", layout="wide")
	st.title("🏆 SLM Leaderboard")
	st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ")

	# URL to your markdown file
	md_url = st.text_input("This the default location of the bechmarks and can be changed",
	"https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md")


	st.markdown("""
	Copy the following links into the textbox above and refresh dashboard:

	- [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md)
	- [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """)

	if not md_url:
	st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
	return

	try:
	response = requests.get(md_url)
	response.raise_for_status()
	md_content = response.text

	df = convert_markdown_table_to_dataframe(md_content)

	# Automatically detect metrics (all columns except 'Model' and 'URL')
	metric_columns = [col for col in df.columns if col not in ['Model', 'URL']]

	# Convert metric columns to numeric, handling errors gracefully
	for col in metric_columns:
	df[col] = pd.to_numeric(df[col], errors='coerce')

	# Calculate "Average" score as a new column (exclude 'Model' and 'URL')
	df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
	if 'Average' not in metric_columns:
	metric_columns.append('Average')

	# Dropdown to select color map
	color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])

	# Sortable leaderboard table
	st.dataframe(
	df[['Model'] + metric_columns + ['URL']],
	use_container_width=True,
	hide_index=True,
	)

	# Bar charts for each metric
	for i, metric in enumerate(metric_columns):
	create_bar_chart(df, metric, color_map, key_suffix=i)

	# Extra bar chart for the "Average" score
	create_bar_chart(df, 'Average', color_map, key_suffix="average")

	# Radar chart for the top 10 models by "Average" score
	create_radar_chart(df, metric_columns)

	except Exception as e:
	st.error(f"An error occurred while processing the markdown table: {e}")


	if __name__ == "__main__":
	main()