Spaces:
Runtime error
Runtime error
Commit
·
460ecf2
1
Parent(s):
2762eff
nathan-flagged-models-vis (#478)
Browse files- Adds a way to hide flagged models (a69dfa979897081c10a30f1be9937a917d93422b)
- remove unnused pprint import (1be35c2d1ffffab552d9d65f826930e4f9f1c273)
- remove unnused pprint import (6adc61160db982ce023039472b8842d21584b367)
- app.py +14 -3
- src/display/utils.py +2 -0
- src/leaderboard/filter_models.py +14 -0
app.py
CHANGED
@@ -78,9 +78,10 @@ def update_table(
|
|
78 |
precision_query: str,
|
79 |
size_query: list,
|
80 |
show_deleted: bool,
|
|
|
81 |
query: str,
|
82 |
):
|
83 |
-
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
|
84 |
filtered_df = filter_queries(query, filtered_df)
|
85 |
df = select_columns(filtered_df, columns)
|
86 |
return df
|
@@ -128,7 +129,7 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
|
|
128 |
|
129 |
|
130 |
def filter_models(
|
131 |
-
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool
|
132 |
) -> pd.DataFrame:
|
133 |
# Show all models
|
134 |
if show_deleted:
|
@@ -136,6 +137,9 @@ def filter_models(
|
|
136 |
else: # Show only still on the hub models
|
137 |
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
138 |
|
|
|
|
|
|
|
139 |
type_emoji = [t[0] for t in type_query]
|
140 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
141 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
|
@@ -147,6 +151,7 @@ def filter_models(
|
|
147 |
|
148 |
return filtered_df
|
149 |
|
|
|
150 |
|
151 |
demo = gr.Blocks(css=custom_css)
|
152 |
with demo:
|
@@ -183,6 +188,9 @@ with demo:
|
|
183 |
deleted_models_visibility = gr.Checkbox(
|
184 |
value=False, label="Show private/deleted models", interactive=True
|
185 |
)
|
|
|
|
|
|
|
186 |
with gr.Column(min_width=320):
|
187 |
#with gr.Box(elem_id="box-filter"):
|
188 |
filter_columns_type = gr.CheckboxGroup(
|
@@ -237,6 +245,7 @@ with demo:
|
|
237 |
filter_columns_precision,
|
238 |
filter_columns_size,
|
239 |
deleted_models_visibility,
|
|
|
240 |
search_bar,
|
241 |
],
|
242 |
leaderboard_table,
|
@@ -253,6 +262,7 @@ with demo:
|
|
253 |
filter_columns_precision,
|
254 |
filter_columns_size,
|
255 |
deleted_models_visibility,
|
|
|
256 |
search_bar,
|
257 |
],
|
258 |
leaderboard_table,
|
@@ -260,7 +270,7 @@ with demo:
|
|
260 |
# Check query parameter once at startup and update search bar + hidden component
|
261 |
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
262 |
|
263 |
-
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
|
264 |
selector.change(
|
265 |
update_table,
|
266 |
[
|
@@ -270,6 +280,7 @@ with demo:
|
|
270 |
filter_columns_precision,
|
271 |
filter_columns_size,
|
272 |
deleted_models_visibility,
|
|
|
273 |
search_bar,
|
274 |
],
|
275 |
leaderboard_table,
|
|
|
78 |
precision_query: str,
|
79 |
size_query: list,
|
80 |
show_deleted: bool,
|
81 |
+
show_flagged: bool,
|
82 |
query: str,
|
83 |
):
|
84 |
+
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, show_flagged)
|
85 |
filtered_df = filter_queries(query, filtered_df)
|
86 |
df = select_columns(filtered_df, columns)
|
87 |
return df
|
|
|
129 |
|
130 |
|
131 |
def filter_models(
|
132 |
+
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, show_flagged: bool
|
133 |
) -> pd.DataFrame:
|
134 |
# Show all models
|
135 |
if show_deleted:
|
|
|
137 |
else: # Show only still on the hub models
|
138 |
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
139 |
|
140 |
+
if not show_flagged:
|
141 |
+
filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
|
142 |
+
|
143 |
type_emoji = [t[0] for t in type_query]
|
144 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
145 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
|
|
|
151 |
|
152 |
return filtered_df
|
153 |
|
154 |
+
leaderboard_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], False, False)
|
155 |
|
156 |
demo = gr.Blocks(css=custom_css)
|
157 |
with demo:
|
|
|
188 |
deleted_models_visibility = gr.Checkbox(
|
189 |
value=False, label="Show private/deleted models", interactive=True
|
190 |
)
|
191 |
+
flagged_models_visibility = gr.Checkbox(
|
192 |
+
value=False, label="Show flagged models", interactive=True
|
193 |
+
)
|
194 |
with gr.Column(min_width=320):
|
195 |
#with gr.Box(elem_id="box-filter"):
|
196 |
filter_columns_type = gr.CheckboxGroup(
|
|
|
245 |
filter_columns_precision,
|
246 |
filter_columns_size,
|
247 |
deleted_models_visibility,
|
248 |
+
flagged_models_visibility,
|
249 |
search_bar,
|
250 |
],
|
251 |
leaderboard_table,
|
|
|
262 |
filter_columns_precision,
|
263 |
filter_columns_size,
|
264 |
deleted_models_visibility,
|
265 |
+
flagged_models_visibility,
|
266 |
search_bar,
|
267 |
],
|
268 |
leaderboard_table,
|
|
|
270 |
# Check query parameter once at startup and update search bar + hidden component
|
271 |
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
272 |
|
273 |
+
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, flagged_models_visibility]:
|
274 |
selector.change(
|
275 |
update_table,
|
276 |
[
|
|
|
280 |
filter_columns_precision,
|
281 |
filter_columns_size,
|
282 |
deleted_models_visibility,
|
283 |
+
flagged_models_visibility,
|
284 |
search_bar,
|
285 |
],
|
286 |
leaderboard_table,
|
src/display/utils.py
CHANGED
@@ -51,6 +51,7 @@ auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B
|
|
51 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
52 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
53 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
|
|
54 |
# Dummy column for the search bar (hidden by the custom CSS)
|
55 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
56 |
|
@@ -80,6 +81,7 @@ baseline_row = {
|
|
80 |
AutoEvalColumn.gsm8k.name: 0.21,
|
81 |
AutoEvalColumn.dummy.name: "baseline",
|
82 |
AutoEvalColumn.model_type.name: "",
|
|
|
83 |
}
|
84 |
|
85 |
# Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
|
|
|
51 |
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
|
52 |
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
53 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
54 |
+
auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, False)])
|
55 |
# Dummy column for the search bar (hidden by the custom CSS)
|
56 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
57 |
|
|
|
81 |
AutoEvalColumn.gsm8k.name: 0.21,
|
82 |
AutoEvalColumn.dummy.name: "baseline",
|
83 |
AutoEvalColumn.model_type.name: "",
|
84 |
+
AutoEvalColumn.flagged.name: False,
|
85 |
}
|
86 |
|
87 |
# Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
|
src/leaderboard/filter_models.py
CHANGED
@@ -14,6 +14,17 @@ FLAGGED_MODELS = {
|
|
14 |
"AIDC-ai-business/Marcoroni-13B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
15 |
"AIDC-ai-business/Marcoroni-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
16 |
"fblgit/una-xaberius-34b-v1beta": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/444",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
}
|
18 |
|
19 |
# Models which have been requested by orgs to not be submitted on the leaderboard
|
@@ -36,6 +47,9 @@ def flag_models(leaderboard_data: list[dict]):
|
|
36 |
model_data[
|
37 |
AutoEvalColumn.model.name
|
38 |
] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}"
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
def remove_forbidden_models(leaderboard_data: list[dict]):
|
|
|
14 |
"AIDC-ai-business/Marcoroni-13B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
15 |
"AIDC-ai-business/Marcoroni-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/287",
|
16 |
"fblgit/una-xaberius-34b-v1beta": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/444",
|
17 |
+
"jan-hq/trinity-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
18 |
+
"rwitz2/go-bruins-v2.1.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
19 |
+
"rwitz2/go-bruins-v2.1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
20 |
+
"GreenNode/GreenNodeLM-v3olet-7B": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
21 |
+
"GreenNode/GreenNodeLM-7B-v4leo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
22 |
+
"GreenNode/LeoScorpius-GreenNode-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
23 |
+
"viethq188/LeoScorpius-7B-Chat-DPO": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
24 |
+
"GreenNode/GreenNodeLM-7B-v2leo": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
25 |
+
"janai-hq/trinity-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
26 |
+
"ignos/LeoScorpius-GreenNode-Alpaca-7B-v1": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
27 |
+
"fblgit/una-cybertron-7b-v3-OMA": "https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/474",
|
28 |
}
|
29 |
|
30 |
# Models which have been requested by orgs to not be submitted on the leaderboard
|
|
|
47 |
model_data[
|
48 |
AutoEvalColumn.model.name
|
49 |
] = f"{model_data[AutoEvalColumn.model.name]} has been flagged! {issue_link}"
|
50 |
+
model_data[AutoEvalColumn.flagged.name] = True
|
51 |
+
else:
|
52 |
+
model_data[AutoEvalColumn.flagged.name] = False
|
53 |
|
54 |
|
55 |
def remove_forbidden_models(leaderboard_data: list[dict]):
|