Spaces:
Running
Running
added app
Browse files- app.py +70 -8
- requirements.txt +5 -0
app.py
CHANGED
@@ -4,6 +4,7 @@ import requests
|
|
4 |
import pandas as pd
|
5 |
from io import StringIO
|
6 |
import plotly.graph_objs as go
|
|
|
7 |
|
8 |
|
9 |
def convert_markdown_table_to_dataframe(md_content):
|
@@ -23,7 +24,7 @@ def convert_markdown_table_to_dataframe(md_content):
|
|
23 |
return df
|
24 |
|
25 |
|
26 |
-
def create_bar_chart(df, metric):
|
27 |
"""
|
28 |
Creates and displays a bar chart for a given metric.
|
29 |
"""
|
@@ -37,19 +38,67 @@ def create_bar_chart(df, metric):
|
|
37 |
x=sorted_df[metric],
|
38 |
y=sorted_df['Model'],
|
39 |
orientation='h',
|
40 |
-
marker=dict(color=sorted_df[metric], colorscale=
|
41 |
))
|
42 |
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
|
43 |
-
st.plotly_chart(fig, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
|
46 |
def main():
|
47 |
st.set_page_config(page_title="LLM Leaderboard", layout="wide")
|
48 |
-
st.title("🏆
|
|
|
49 |
|
50 |
# URL to your markdown file
|
51 |
-
md_url = st.text_input("
|
|
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
if not md_url:
|
54 |
st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
|
55 |
return
|
@@ -68,6 +117,14 @@ def main():
|
|
68 |
for col in metric_columns:
|
69 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# Sortable leaderboard table
|
72 |
st.dataframe(
|
73 |
df[['Model'] + metric_columns + ['URL']],
|
@@ -76,8 +133,14 @@ def main():
|
|
76 |
)
|
77 |
|
78 |
# Bar charts for each metric
|
79 |
-
for metric in metric_columns:
|
80 |
-
create_bar_chart(df, metric)
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
except Exception as e:
|
83 |
st.error(f"An error occurred while processing the markdown table: {e}")
|
@@ -85,4 +148,3 @@ def main():
|
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
main()
|
88 |
-
|
|
|
4 |
import pandas as pd
|
5 |
from io import StringIO
|
6 |
import plotly.graph_objs as go
|
7 |
+
import plotly.express as px
|
8 |
|
9 |
|
10 |
def convert_markdown_table_to_dataframe(md_content):
|
|
|
24 |
return df
|
25 |
|
26 |
|
27 |
+
def create_bar_chart(df, metric, color_map, key_suffix):
|
28 |
"""
|
29 |
Creates and displays a bar chart for a given metric.
|
30 |
"""
|
|
|
38 |
x=sorted_df[metric],
|
39 |
y=sorted_df['Model'],
|
40 |
orientation='h',
|
41 |
+
marker=dict(color=sorted_df[metric], colorscale=color_map)
|
42 |
))
|
43 |
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
|
44 |
+
st.plotly_chart(fig, use_container_width=True, key=f"bar_chart_{metric}_{key_suffix}")
|
45 |
+
|
46 |
+
|
47 |
+
def create_radar_chart(df, metric_columns):
|
48 |
+
"""
|
49 |
+
Create a radar chart for the top 10 models by "Average" score.
|
50 |
+
"""
|
51 |
+
st.write("### Radar Chart (Top 10 Models by Average Score)")
|
52 |
+
|
53 |
+
if 'Average' not in df.columns:
|
54 |
+
st.write("Average column not found.")
|
55 |
+
return
|
56 |
+
|
57 |
+
top_10_df = df.nlargest(10, 'Average')
|
58 |
+
if top_10_df.empty:
|
59 |
+
st.write("No models available for the radar chart.")
|
60 |
+
return
|
61 |
+
|
62 |
+
radar_data = top_10_df[['Model'] + metric_columns].set_index('Model')
|
63 |
+
|
64 |
+
fig = go.Figure()
|
65 |
+
|
66 |
+
for model_name, row in radar_data.iterrows():
|
67 |
+
fig.add_trace(go.Scatterpolar(
|
68 |
+
r=row.values,
|
69 |
+
theta=metric_columns,
|
70 |
+
fill='toself',
|
71 |
+
name=model_name
|
72 |
+
))
|
73 |
+
|
74 |
+
fig.update_layout(
|
75 |
+
polar=dict(
|
76 |
+
radialaxis=dict(visible=True, range=[0, 1.2 * radar_data.max().max()]) # Dynamic range
|
77 |
+
),
|
78 |
+
showlegend=True
|
79 |
+
)
|
80 |
+
|
81 |
+
st.plotly_chart(fig, use_container_width=True, key="radar_chart")
|
82 |
|
83 |
|
84 |
def main():
|
85 |
st.set_page_config(page_title="LLM Leaderboard", layout="wide")
|
86 |
+
st.title("🏆 SLM Leaderboard")
|
87 |
+
st.markdown("We record Nous benchmark results for various SLMs. Please submit a PR to this [repo](https://github.com/amazon-science/aws-research-science/tree/main/SLMleaderboard) to inlcude your model! Heavily Inspired by [YALB](https://huggingface.co/spaces/mlabonne/Yet_Another_LLM_Leaderboard) ")
|
88 |
|
89 |
# URL to your markdown file
|
90 |
+
md_url = st.text_input("This the default location of the bechmarks and can be changed",
|
91 |
+
"https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md")
|
92 |
|
93 |
+
|
94 |
+
st.markdown("""
|
95 |
+
Copy the following links into the textbox above and refresh dashboard:
|
96 |
+
|
97 |
+
- [Nous benchmark results](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/nous.md)
|
98 |
+
- [Standard LLM benchmarks](https://raw.githubusercontent.com/amazon-science/aws-research-science/refs/heads/main/SLMleaderboard/standard.md) """)
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
if not md_url:
|
103 |
st.error("Please provide a valid URL to a markdown file containing the leaderboard table.")
|
104 |
return
|
|
|
117 |
for col in metric_columns:
|
118 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
119 |
|
120 |
+
# Calculate "Average" score as a new column (exclude 'Model' and 'URL')
|
121 |
+
df['Average'] = df[metric_columns].mean(axis=1, skipna=True)
|
122 |
+
if 'Average' not in metric_columns:
|
123 |
+
metric_columns.append('Average')
|
124 |
+
|
125 |
+
# Dropdown to select color map
|
126 |
+
color_map = st.selectbox("Select Color Map for Bar Charts", options=['Inferno', 'Viridis', 'Cividis'])
|
127 |
+
|
128 |
# Sortable leaderboard table
|
129 |
st.dataframe(
|
130 |
df[['Model'] + metric_columns + ['URL']],
|
|
|
133 |
)
|
134 |
|
135 |
# Bar charts for each metric
|
136 |
+
for i, metric in enumerate(metric_columns):
|
137 |
+
create_bar_chart(df, metric, color_map, key_suffix=i)
|
138 |
+
|
139 |
+
# Extra bar chart for the "Average" score
|
140 |
+
create_bar_chart(df, 'Average', color_map, key_suffix="average")
|
141 |
+
|
142 |
+
# Radar chart for the top 10 models by "Average" score
|
143 |
+
create_radar_chart(df, metric_columns)
|
144 |
|
145 |
except Exception as e:
|
146 |
st.error(f"An error occurred while processing the markdown table: {e}")
|
|
|
148 |
|
149 |
if __name__ == "__main__":
|
150 |
main()
|
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
requests
|
4 |
+
plotly
|
5 |
+
|