Spaces:
Running
Running
Kang Suhyun
suhyun.kang
commited on
[#9] Add language filter options to translation leaderboard (#51)
Browse files* [#9] Add language filter options to translation leaderboard
This change adds the ability to filter the translation leaderboard by language.
The user can select a source language and a target language, and the leaderboard will display the scores for that language pair.
Optimization is not considered in this change.
Screenshot: https://screen.yanolja.in/avEVnBkaotnqunIh.png
* Remove Apply button
---------
Co-authored-by: suhyun.kang <[email protected]>
- app.py +1 -4
- leaderboard.py +61 -13
app.py
CHANGED
@@ -9,13 +9,10 @@ import gradio as gr
|
|
9 |
|
10 |
from leaderboard import build_leaderboard
|
11 |
from leaderboard import db
|
|
|
12 |
import response
|
13 |
from response import get_responses
|
14 |
|
15 |
-
SUPPORTED_TRANSLATION_LANGUAGES = [
|
16 |
-
"Korean", "English", "Chinese", "Japanese", "Spanish", "French"
|
17 |
-
]
|
18 |
-
|
19 |
|
20 |
class VoteOptions(enum.Enum):
|
21 |
MODEL_A = "Model A is better"
|
|
|
9 |
|
10 |
from leaderboard import build_leaderboard
|
11 |
from leaderboard import db
|
12 |
+
from leaderboard import SUPPORTED_TRANSLATION_LANGUAGES
|
13 |
import response
|
14 |
from response import get_responses
|
15 |
|
|
|
|
|
|
|
|
|
16 |
|
17 |
class VoteOptions(enum.Enum):
|
18 |
MODEL_A = "Model A is better"
|
leaderboard.py
CHANGED
@@ -9,6 +9,7 @@ import math
|
|
9 |
import firebase_admin
|
10 |
from firebase_admin import credentials
|
11 |
from firebase_admin import firestore
|
|
|
12 |
import gradio as gr
|
13 |
import pandas as pd
|
14 |
|
@@ -18,6 +19,10 @@ from credentials import get_credentials_json
|
|
18 |
firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
|
19 |
db = firestore.client()
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class LeaderboardTab(enum.Enum):
|
23 |
SUMMARIZATION = "Summarization"
|
@@ -44,16 +49,26 @@ def compute_elo(battles, k=4, scale=400, base=10, initial_rating=1000):
|
|
44 |
return rating
|
45 |
|
46 |
|
47 |
-
def get_docs(tab):
|
48 |
if tab == LeaderboardTab.SUMMARIZATION:
|
49 |
return db.collection("arena-summarizations").order_by("timestamp").stream()
|
50 |
|
51 |
if tab == LeaderboardTab.TRANSLATION:
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
|
|
54 |
|
55 |
-
|
56 |
-
|
|
|
57 |
|
58 |
battles = []
|
59 |
for doc in docs:
|
@@ -75,16 +90,27 @@ def load_elo_ratings(tab):
|
|
75 |
for i, (model, rating) in enumerate(sorted_ratings)]
|
76 |
|
77 |
|
78 |
-
|
79 |
-
|
80 |
|
|
|
|
|
|
|
|
|
81 |
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
|
86 |
-
|
87 |
-
|
|
|
|
|
88 |
|
89 |
|
90 |
def build_leaderboard():
|
@@ -92,16 +118,38 @@ def build_leaderboard():
|
|
92 |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
93 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
94 |
datatype=["number", "str", "number"],
|
95 |
-
value=
|
96 |
every=LEADERBOARD_UPDATE_INTERVAL,
|
97 |
elem_classes="leaderboard")
|
98 |
gr.Markdown(LEADERBOARD_INFO)
|
99 |
|
100 |
-
# TODO(#9): Add language filter options.
|
101 |
with gr.Tab(LeaderboardTab.TRANSLATION.value):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
103 |
datatype=["number", "str", "number"],
|
104 |
-
value=
|
105 |
every=LEADERBOARD_UPDATE_INTERVAL,
|
106 |
elem_classes="leaderboard")
|
107 |
gr.Markdown(LEADERBOARD_INFO)
|
|
|
9 |
import firebase_admin
|
10 |
from firebase_admin import credentials
|
11 |
from firebase_admin import firestore
|
12 |
+
from google.cloud.firestore_v1 import base_query
|
13 |
import gradio as gr
|
14 |
import pandas as pd
|
15 |
|
|
|
19 |
firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
|
20 |
db = firestore.client()
|
21 |
|
22 |
+
SUPPORTED_TRANSLATION_LANGUAGES = [
|
23 |
+
"Korean", "English", "Chinese", "Japanese", "Spanish", "French"
|
24 |
+
]
|
25 |
+
|
26 |
|
27 |
class LeaderboardTab(enum.Enum):
|
28 |
SUMMARIZATION = "Summarization"
|
|
|
49 |
return rating
|
50 |
|
51 |
|
52 |
+
def get_docs(tab: str, source_lang: str = None, target_lang: str = None):
|
53 |
if tab == LeaderboardTab.SUMMARIZATION:
|
54 |
return db.collection("arena-summarizations").order_by("timestamp").stream()
|
55 |
|
56 |
if tab == LeaderboardTab.TRANSLATION:
|
57 |
+
collection = db.collection("arena-translations").order_by("timestamp")
|
58 |
+
|
59 |
+
if source_lang:
|
60 |
+
collection = collection.where(filter=base_query.FieldFilter(
|
61 |
+
"source_language", "==", source_lang.lower()))
|
62 |
+
|
63 |
+
if target_lang:
|
64 |
+
collection = collection.where(filter=base_query.FieldFilter(
|
65 |
+
"target_language", "==", target_lang.lower()))
|
66 |
|
67 |
+
return collection.stream()
|
68 |
|
69 |
+
|
70 |
+
def load_elo_ratings(tab, source_lang: str = None, target_lang: str = None):
|
71 |
+
docs = get_docs(tab, source_lang, target_lang)
|
72 |
|
73 |
battles = []
|
74 |
for doc in docs:
|
|
|
90 |
for i, (model, rating) in enumerate(sorted_ratings)]
|
91 |
|
92 |
|
93 |
+
LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
|
94 |
+
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
|
95 |
|
96 |
+
DEFAULT_FILTER_OPTIONS = {
|
97 |
+
"source_language": "English",
|
98 |
+
"target_language": "Spanish"
|
99 |
+
}
|
100 |
|
101 |
+
filtered_dataframe = gr.DataFrame(
|
102 |
+
headers=["Rank", "Model", "Elo rating"],
|
103 |
+
datatype=["number", "str", "number"],
|
104 |
+
value=lambda: load_elo_ratings(
|
105 |
+
LeaderboardTab.TRANSLATION, DEFAULT_FILTER_OPTIONS[
|
106 |
+
"source_language"], DEFAULT_FILTER_OPTIONS["target_language"]),
|
107 |
+
elem_classes="leaderboard")
|
108 |
|
109 |
|
110 |
+
def update_filtered_leaderboard(source_lang, target_lang):
|
111 |
+
new_value = load_elo_ratings(LeaderboardTab.TRANSLATION, source_lang,
|
112 |
+
target_lang)
|
113 |
+
return gr.update(value=new_value)
|
114 |
|
115 |
|
116 |
def build_leaderboard():
|
|
|
118 |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
119 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
120 |
datatype=["number", "str", "number"],
|
121 |
+
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION),
|
122 |
every=LEADERBOARD_UPDATE_INTERVAL,
|
123 |
elem_classes="leaderboard")
|
124 |
gr.Markdown(LEADERBOARD_INFO)
|
125 |
|
|
|
126 |
with gr.Tab(LeaderboardTab.TRANSLATION.value):
|
127 |
+
with gr.Accordion("Filter", open=False):
|
128 |
+
with gr.Row():
|
129 |
+
source_language = gr.Dropdown(
|
130 |
+
choices=SUPPORTED_TRANSLATION_LANGUAGES,
|
131 |
+
label="Source language",
|
132 |
+
value=DEFAULT_FILTER_OPTIONS["source_language"],
|
133 |
+
interactive=True)
|
134 |
+
target_language = gr.Dropdown(
|
135 |
+
choices=SUPPORTED_TRANSLATION_LANGUAGES,
|
136 |
+
label="Target language",
|
137 |
+
value=DEFAULT_FILTER_OPTIONS["target_language"],
|
138 |
+
interactive=True)
|
139 |
+
|
140 |
+
source_language.change(fn=update_filtered_leaderboard,
|
141 |
+
inputs=[source_language, target_language],
|
142 |
+
outputs=filtered_dataframe)
|
143 |
+
target_language.change(fn=update_filtered_leaderboard,
|
144 |
+
inputs=[source_language, target_language],
|
145 |
+
outputs=filtered_dataframe)
|
146 |
+
|
147 |
+
with gr.Row():
|
148 |
+
filtered_dataframe.render()
|
149 |
+
|
150 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
151 |
datatype=["number", "str", "number"],
|
152 |
+
value=lambda: load_elo_ratings(LeaderboardTab.TRANSLATION),
|
153 |
every=LEADERBOARD_UPDATE_INTERVAL,
|
154 |
elem_classes="leaderboard")
|
155 |
gr.Markdown(LEADERBOARD_INFO)
|