Spaces:
Running
Running
import re | |
import streamlit as st | |
import requests | |
import pandas as pd | |
from io import StringIO | |
import plotly.graph_objs as go | |
import plotly.express as px | |
def convert_markdown_table_to_dataframe(md_content): | |
""" | |
Converts a markdown table to a Pandas DataFrame, handling special characters, links, | |
and extracting Hugging Face URLs. | |
""" | |
cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE) | |
df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python') | |
df = df.drop(0, axis=0) # Remove first row if it's not the header | |
df.columns = df.columns.str.strip() # Clean column names | |
# Extract Model names and URLs | |
model_link_pattern = r'\[(.*?)\]\((.*?)\)' | |
df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None) | |
df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x)) | |
return df | |
def create_bar_chart(df, metric, color_map, key_suffix): | |
""" | |
Creates and displays a bar chart for a given metric. | |
""" | |
st.write(f"### {metric} Scores") | |
if metric not in df.columns: | |
st.write(f"No data available for {metric}.") | |
return | |
sorted_df = df[['Model', metric]].dropna().sort_values(by=metric, ascending=True) | |
fig = go.Figure(go.Bar( | |
x=sorted_df[metric], | |
y=sorted_df['Model'], | |
orientation='h', | |
marker=dict(color=sorted_df[metric], colorscale=color_map) | |
)) | |
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20)) | |
st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}") | |
def create_radar_chart(df, metric_columns): | |
""" | |
Create a radar chart for the top 10 models by "Average" score. | |
""" | |
st.write("### Radar Chart (Top 10 Models by Average Score)") | |
if 'Average' not in df.columns: | |
st.write("Average column not found.") | |
return | |
top_10_df = df.nlargest(10, 'Average') | |
if top_10_df.empty: | |
st.write("No models available for the radar chart.") | |
return | |
radar_data = top_10_df[['Model'] + metric_columns].set_index('Model') | |
fig = go.Figure() | |
for model_name, row in radar_data.iterrows(): | |
fig.add_trace(go.Scatterpolar( | |
r=row.values, | |
theta=metric_columns, | |
fill='toself', | |
name=model_name | |
)) | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()]) # Dynamic range | |
), | |
showlegend=True | |
) | |
st.plotly_chart(fig, use_container_width=True, key="radar_chart") | |
def main(): | |
st.set_page_config(page_title="SLM Leaderboard", layout="wide") | |
st.title("π SLM Leaderboard") | |
st.markdown("We record Nous and Standard benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ") | |
# URL to your markdown file | |
md_url = st.text_input("This the default location of the bechmarks and can be changed", | |
"https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md") | |
st.markdown(""" | |
Copy the following links into the textbox above and refresh dashboard: | |
- [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md) | |
- [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """) | |
if not md_url: | |
st.error("Please provide a valid URL to a markdown file containing the leaderboard table.") | |
return | |
try: | |
response = requests.get(md_url) | |
response.raise_for_status() | |
md_content = response.text | |
df = convert_markdown_table_to_dataframe(md_content) | |
# Automatically detect metrics (all columns except 'Model' and 'URL') | |
metric_columns = [col for col in df.columns if col not in ['Model', 'URL']] | |
# Convert metric columns to numeric, handling errors gracefully | |
for col in metric_columns: | |
df[col] = pd.to_numeric(df[col], errors='coerce') | |
# Calculate "Average" score as a new column (exclude 'Model' and 'URL') | |
df['Average'] = df[metric_columns].mean(axis=1, skipna=True) | |
if 'Average' not in metric_columns: | |
metric_columns.append('Average') | |
# Dropdown to select color map | |
color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis']) | |
# Sortable leaderboard table | |
st.dataframe( | |
df[['Model'] + metric_columns + ['URL']], | |
use_container_width=True, | |
hide_index=True, | |
) | |
# Bar charts for each metric | |
for i, metric in enumerate(metric_columns): | |
create_bar_chart(df, metric, color_map, key_suffix=i) | |
# Extra bar chart for the "Average" score | |
create_bar_chart(df, 'Average', color_map, key_suffix="average") | |
# Radar chart for the top 10 models by "Average" score | |
create_radar_chart(df, metric_columns) | |
except Exception as e: | |
st.error(f"An error occurred while processing the markdown table: {e}") | |
if __name__ == "__main__": | |
main() | |