Spaces:
Running
Running
Kang Suhyun
commited on
[#106] Display only one leaderboard table based on filter (#107)
Browse files* [#106] Display only one leaderboard table based on filter
This change updates the leaderboard display behavior to show only one table at a time.
If the language filter is set to 'Any', the original, unfiltered leaderboard is displayed.
If a specific language is selected, a filtered leaderboard is shown.
* update
* update
* review
- app.py +3 -3
- leaderboard.py +89 -94
app.py
CHANGED
@@ -10,7 +10,7 @@ import lingua
|
|
10 |
|
11 |
from leaderboard import build_leaderboard
|
12 |
from leaderboard import db
|
13 |
-
from leaderboard import
|
14 |
from model import check_models
|
15 |
from model import supported_models
|
16 |
import response
|
@@ -87,14 +87,14 @@ with gr.Blocks(title="Arena", css=css) as app:
|
|
87 |
info="The chosen category determines the instruction sent to the LLMs.")
|
88 |
|
89 |
source_language = gr.Dropdown(
|
90 |
-
choices=
|
91 |
value="English",
|
92 |
label="Source language",
|
93 |
info="Choose the source language for translation.",
|
94 |
interactive=True,
|
95 |
visible=False)
|
96 |
target_language = gr.Dropdown(
|
97 |
-
choices=
|
98 |
value="Spanish",
|
99 |
label="Target language",
|
100 |
info="Choose the target language for translation.",
|
|
|
10 |
|
11 |
from leaderboard import build_leaderboard
|
12 |
from leaderboard import db
|
13 |
+
from leaderboard import SUPPORTED_LANGUAGES
|
14 |
from model import check_models
|
15 |
from model import supported_models
|
16 |
import response
|
|
|
87 |
info="The chosen category determines the instruction sent to the LLMs.")
|
88 |
|
89 |
source_language = gr.Dropdown(
|
90 |
+
choices=SUPPORTED_LANGUAGES,
|
91 |
value="English",
|
92 |
label="Source language",
|
93 |
info="Choose the source language for translation.",
|
94 |
interactive=True,
|
95 |
visible=False)
|
96 |
target_language = gr.Dropdown(
|
97 |
+
choices=SUPPORTED_LANGUAGES,
|
98 |
value="Spanish",
|
99 |
label="Target language",
|
100 |
info="Choose the target language for translation.",
|
leaderboard.py
CHANGED
@@ -5,6 +5,7 @@ It provides a leaderboard component.
|
|
5 |
from collections import defaultdict
|
6 |
import enum
|
7 |
import math
|
|
|
8 |
|
9 |
import firebase_admin
|
10 |
from firebase_admin import credentials
|
@@ -20,7 +21,7 @@ if gr.NO_RELOAD:
|
|
20 |
firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
|
21 |
db = firestore.client()
|
22 |
|
23 |
-
|
24 |
language.name.capitalize() for language in lingua.Language.all()
|
25 |
]
|
26 |
|
@@ -59,7 +60,7 @@ def get_docs(tab: str,
|
|
59 |
if tab == LeaderboardTab.SUMMARIZATION:
|
60 |
collection = db.collection("arena-summarizations").order_by("timestamp")
|
61 |
|
62 |
-
if summary_lang:
|
63 |
collection = collection.where(filter=base_query.FieldFilter(
|
64 |
"model_a_response_language", "==", summary_lang.lower())).where(
|
65 |
filter=base_query.FieldFilter("model_b_response_language", "==",
|
@@ -121,12 +122,6 @@ def load_elo_ratings(tab,
|
|
121 |
LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
|
122 |
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
|
123 |
|
124 |
-
DEFAULT_FILTER_OPTIONS = {
|
125 |
-
"summary_language": lingua.Language.ENGLISH.name.capitalize(),
|
126 |
-
"source_language": ANY_LANGUAGE,
|
127 |
-
"target_language": lingua.Language.ENGLISH.name.capitalize()
|
128 |
-
}
|
129 |
-
|
130 |
|
131 |
def update_filtered_leaderboard(tab, summary_lang: str, source_lang: str,
|
132 |
target_lang: str):
|
@@ -136,94 +131,94 @@ def update_filtered_leaderboard(tab, summary_lang: str, source_lang: str,
|
|
136 |
|
137 |
def build_leaderboard():
|
138 |
with gr.Tabs():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
summary_language.change(fn=update_filtered_leaderboard,
|
161 |
-
inputs=[
|
162 |
-
gr.State(LeaderboardTab.SUMMARIZATION),
|
163 |
-
summary_language,
|
164 |
-
gr.State(),
|
165 |
-
gr.State()
|
166 |
-
],
|
167 |
-
outputs=filtered_summarization)
|
168 |
-
|
169 |
-
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
170 |
-
datatype=["number", "str", "number"],
|
171 |
-
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION),
|
172 |
-
every=LEADERBOARD_UPDATE_INTERVAL,
|
173 |
-
elem_classes="leaderboard")
|
174 |
gr.Markdown(LEADERBOARD_INFO)
|
175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
with gr.Tab(LeaderboardTab.TRANSLATION.value):
|
177 |
-
with gr.
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
gr.State(), source_language,
|
204 |
-
target_language
|
205 |
-
],
|
206 |
-
outputs=filtered_translation)
|
207 |
-
target_language.change(fn=update_filtered_leaderboard,
|
208 |
-
inputs=[
|
209 |
-
gr.State(LeaderboardTab.TRANSLATION),
|
210 |
-
gr.State(), source_language,
|
211 |
-
target_language
|
212 |
-
],
|
213 |
-
outputs=filtered_translation)
|
214 |
-
|
215 |
-
# When filter options are changed, the accordion keeps closed.
|
216 |
-
# To avoid this, we open the accordion when the filter options are changed.
|
217 |
-
summary_language.change(fn=lambda: gr.Accordion(open=True),
|
218 |
-
outputs=summarization_filter)
|
219 |
-
source_language.change(fn=lambda: gr.Accordion(open=True),
|
220 |
-
outputs=translation_filter)
|
221 |
-
target_language.change(fn=lambda: gr.Accordion(open=True),
|
222 |
-
outputs=translation_filter)
|
223 |
-
|
224 |
-
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
225 |
-
datatype=["number", "str", "number"],
|
226 |
-
value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION),
|
227 |
-
every=LEADERBOARD_UPDATE_INTERVAL,
|
228 |
-
elem_classes="leaderboard")
|
229 |
gr.Markdown(LEADERBOARD_INFO)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from collections import defaultdict
|
6 |
import enum
|
7 |
import math
|
8 |
+
from typing import Tuple
|
9 |
|
10 |
import firebase_admin
|
11 |
from firebase_admin import credentials
|
|
|
21 |
firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
|
22 |
db = firestore.client()
|
23 |
|
24 |
+
SUPPORTED_LANGUAGES = [
|
25 |
language.name.capitalize() for language in lingua.Language.all()
|
26 |
]
|
27 |
|
|
|
60 |
if tab == LeaderboardTab.SUMMARIZATION:
|
61 |
collection = db.collection("arena-summarizations").order_by("timestamp")
|
62 |
|
63 |
+
if summary_lang and (not summary_lang == ANY_LANGUAGE):
|
64 |
collection = collection.where(filter=base_query.FieldFilter(
|
65 |
"model_a_response_language", "==", summary_lang.lower())).where(
|
66 |
filter=base_query.FieldFilter("model_b_response_language", "==",
|
|
|
122 |
LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
|
123 |
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
def update_filtered_leaderboard(tab, summary_lang: str, source_lang: str,
|
127 |
target_lang: str):
|
|
|
131 |
|
132 |
def build_leaderboard():
|
133 |
with gr.Tabs():
|
134 |
+
|
135 |
+
# Returns (original leaderboard, filtered leaderboard).
|
136 |
+
def toggle_leaderboard(language: str) -> Tuple[gr.Dataframe, gr.Dataframe]:
|
137 |
+
filter_chosen = language != ANY_LANGUAGE
|
138 |
+
return gr.Dataframe(visible=not filter_chosen), gr.Dataframe(
|
139 |
+
visible=filter_chosen)
|
140 |
+
|
141 |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
142 |
+
summary_language = gr.Dropdown(choices=SUPPORTED_LANGUAGES +
|
143 |
+
[ANY_LANGUAGE],
|
144 |
+
value=ANY_LANGUAGE,
|
145 |
+
label="Summary language",
|
146 |
+
interactive=True)
|
147 |
+
|
148 |
+
filtered_summarization = gr.DataFrame(
|
149 |
+
headers=["Rank", "Model", "Elo rating"],
|
150 |
+
datatype=["number", "str", "number"],
|
151 |
+
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION,
|
152 |
+
ANY_LANGUAGE),
|
153 |
+
elem_classes="leaderboard",
|
154 |
+
visible=False)
|
155 |
+
|
156 |
+
original_summarization = gr.Dataframe(
|
157 |
+
headers=["Rank", "Model", "Elo rating"],
|
158 |
+
datatype=["number", "str", "number"],
|
159 |
+
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION),
|
160 |
+
every=LEADERBOARD_UPDATE_INTERVAL,
|
161 |
+
elem_classes="leaderboard")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
gr.Markdown(LEADERBOARD_INFO)
|
163 |
|
164 |
+
summary_language.change(
|
165 |
+
fn=update_filtered_leaderboard,
|
166 |
+
inputs=[
|
167 |
+
gr.State(LeaderboardTab.SUMMARIZATION), summary_language,
|
168 |
+
gr.State(None),
|
169 |
+
gr.State(None)
|
170 |
+
],
|
171 |
+
outputs=filtered_summarization).then(
|
172 |
+
fn=toggle_leaderboard,
|
173 |
+
inputs=summary_language,
|
174 |
+
outputs=[original_summarization, filtered_summarization])
|
175 |
+
|
176 |
with gr.Tab(LeaderboardTab.TRANSLATION.value):
|
177 |
+
with gr.Row():
|
178 |
+
source_language = gr.Dropdown(choices=SUPPORTED_LANGUAGES +
|
179 |
+
[ANY_LANGUAGE],
|
180 |
+
label="Source language",
|
181 |
+
value=ANY_LANGUAGE,
|
182 |
+
interactive=True)
|
183 |
+
target_language = gr.Dropdown(choices=SUPPORTED_LANGUAGES +
|
184 |
+
[ANY_LANGUAGE],
|
185 |
+
label="Target language",
|
186 |
+
value=ANY_LANGUAGE,
|
187 |
+
interactive=True)
|
188 |
+
|
189 |
+
filtered_translation = gr.DataFrame(
|
190 |
+
headers=["Rank", "Model", "Elo rating"],
|
191 |
+
datatype=["number", "str", "number"],
|
192 |
+
value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION,
|
193 |
+
ANY_LANGUAGE, ANY_LANGUAGE),
|
194 |
+
elem_classes="leaderboard",
|
195 |
+
visible=False)
|
196 |
+
|
197 |
+
original_translation = gr.Dataframe(
|
198 |
+
headers=["Rank", "Model", "Elo rating"],
|
199 |
+
datatype=["number", "str", "number"],
|
200 |
+
value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION),
|
201 |
+
every=LEADERBOARD_UPDATE_INTERVAL,
|
202 |
+
elem_classes="leaderboard")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
gr.Markdown(LEADERBOARD_INFO)
|
204 |
+
|
205 |
+
source_language.change(
|
206 |
+
fn=update_filtered_leaderboard,
|
207 |
+
inputs=[
|
208 |
+
gr.State(LeaderboardTab.TRANSLATION),
|
209 |
+
gr.State(None), source_language, target_language
|
210 |
+
],
|
211 |
+
outputs=filtered_translation).then(
|
212 |
+
fn=toggle_leaderboard,
|
213 |
+
inputs=source_language,
|
214 |
+
outputs=[original_translation, filtered_translation])
|
215 |
+
target_language.change(
|
216 |
+
fn=update_filtered_leaderboard,
|
217 |
+
inputs=[
|
218 |
+
gr.State(LeaderboardTab.TRANSLATION),
|
219 |
+
gr.State(None), source_language, target_language
|
220 |
+
],
|
221 |
+
outputs=filtered_translation).then(
|
222 |
+
fn=toggle_leaderboard,
|
223 |
+
inputs=target_language,
|
224 |
+
outputs=[original_translation, filtered_translation])
|