SLM-Leaderboard / app.py
w601sxs's picture
reverted
992f74a
import re
import streamlit as st
import requests
import pandas as pd
from io import StringIO
import plotly.graph_objs as go
import plotly.express as px
def convert_markdown_table_to_dataframe(md_content):
"""
Converts a markdown table to a Pandas DataFrame, handling special characters, links,
and extracting Hugging Face URLs.
"""
cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
df = df.drop(0, axis=0) # Remove first row if it's not the header
df.columns = df.columns.str.strip() # Clean column names
# Extract Model names and URLs
model_link_pattern = r'\[(.*?)\]\((.*?)\)'
df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
return df
def create_bar_chart(df, metric, color_map, key_suffix):
"""
Creates and displays a bar chart for a given metric.
"""
st.write(f"### {metric} Scores")
if metric not in df.columns:
st.write(f"No data available for {metric}.")
return
sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True)
fig = go.Figure(go.Bar(
x=sorted_df[metric],
y=sorted_df['Model'],
orientation='h',
marker=dict(color=sorted_df[metric], colorscale=color_map)
))
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")
def create_radar_chart(df, metric_columns):
"""
Create a radar chart for the top 10 models by "Average" score.
"""
st.write("### Radar Chart (Top 10 Models by Average Score)")
if 'Average' not in df.columns:
st.write("Average column not found.")
return
top_10_df = df.nlargest(10, 'Average')
if top_10_df.empty:
st.write("No models available for the radar chart.")
return
radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')
fig = go.Figure()
for model_name, row in radar_data.iterrows():
fig.add_trace(go.Scatterpolar(
r=row.values,
theta=metric_columns,
fill='toself',
name=model_name
))
fig.update_layout(
polar=dict(
radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()]) # Dynamic range
),
showlegend=True
)
st.plotly_chart(fig, use_container_width=True, key="radar_chart")
def main():
st.set_page_config(page_title="SLM Leaderboard", layout="wide")
st.title("πŸ† SLM Leaderboard")
st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ")
# URL to your markdown file
md_url = st.text_input("This the default location of the bechmarks and can be changed",
"https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md")
st.markdown("""
Copy the following links into the textbox above and refresh dashboard:
- [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md)
- [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """)
if not md_url:
st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
return
try:
response = requests.get(md_url)
response.raise_for_status()
md_content = response.text
df = convert_markdown_table_to_dataframe(md_content)
# Automatically detect metrics (all columns except 'Model' and 'URL')
metric_columns = [col for col in df.columns if col not in ['Model', 'URL']]
# Convert metric columns to numeric, handling errors gracefully
for col in metric_columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Calculate "Average" score as a new column (exclude 'Model' and 'URL')
df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
if 'Average' not in metric_columns:
metric_columns.append('Average')
# Dropdown to select color map
color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])
# Sortable leaderboard table
st.dataframe(
df[['Model'] + metric_columns + ['URL']],
use_container_width=True,
hide_index=True,
)
# Bar charts for each metric
for i, metric in enumerate(metric_columns):
create_bar_chart(df, metric, color_map, key_suffix=i)
# Extra bar chart for the "Average" score
create_bar_chart(df, 'Average', color_map, key_suffix="average")
# Radar chart for the top 10 models by "Average" score
create_radar_chart(df, metric_columns)
except Exception as e:
st.error(f"An error occurred while processing the markdown table: {e}")
if __name__ == "__main__":
main()