Spaces:
Running
Running
Kang Suhyun
commited on
[#57] Store language information in summarization (#61)
Browse files* [#57] Store language information in summarization
Changes:
- Store language information in summarization battle
- Add language filter to the summarization leaderboard
* Remove Apply button
* Update
- app.py +8 -0
- leaderboard.py +75 -23
- requirements.txt +1 -0
app.py
CHANGED
@@ -6,6 +6,7 @@ from uuid import uuid4
|
|
6 |
|
7 |
from firebase_admin import firestore
|
8 |
import gradio as gr
|
|
|
9 |
|
10 |
from leaderboard import build_leaderboard
|
11 |
from leaderboard import db
|
@@ -13,6 +14,8 @@ from leaderboard import SUPPORTED_TRANSLATION_LANGUAGES
|
|
13 |
import response
|
14 |
from response import get_responses
|
15 |
|
|
|
|
|
16 |
|
17 |
class VoteOptions(enum.Enum):
|
18 |
MODEL_A = "Model A is better"
|
@@ -41,7 +44,12 @@ def vote(vote_button, response_a, response_b, model_a_name, model_b_name,
|
|
41 |
}
|
42 |
|
43 |
if category == response.Category.SUMMARIZE.value:
|
|
|
|
|
|
|
44 |
doc_ref = db.collection("arena-summarizations").document(doc_id)
|
|
|
|
|
45 |
doc_ref.set(doc)
|
46 |
|
47 |
return outputs
|
|
|
6 |
|
7 |
from firebase_admin import firestore
|
8 |
import gradio as gr
|
9 |
+
import lingua
|
10 |
|
11 |
from leaderboard import build_leaderboard
|
12 |
from leaderboard import db
|
|
|
14 |
import response
|
15 |
from response import get_responses
|
16 |
|
17 |
+
detector = lingua.LanguageDetectorBuilder.from_all_languages().build()
|
18 |
+
|
19 |
|
20 |
class VoteOptions(enum.Enum):
|
21 |
MODEL_A = "Model A is better"
|
|
|
44 |
}
|
45 |
|
46 |
if category == response.Category.SUMMARIZE.value:
|
47 |
+
language_a = detector.detect_language_of(response_a)
|
48 |
+
language_b = detector.detect_language_of(response_b)
|
49 |
+
|
50 |
doc_ref = db.collection("arena-summarizations").document(doc_id)
|
51 |
+
doc["model_a_response_language"] = language_a.name.lower()
|
52 |
+
doc["model_b_response_language"] = language_b.name.lower()
|
53 |
doc_ref.set(doc)
|
54 |
|
55 |
return outputs
|
leaderboard.py
CHANGED
@@ -11,6 +11,7 @@ from firebase_admin import credentials
|
|
11 |
from firebase_admin import firestore
|
12 |
from google.cloud.firestore_v1 import base_query
|
13 |
import gradio as gr
|
|
|
14 |
import pandas as pd
|
15 |
|
16 |
from credentials import get_credentials_json
|
@@ -20,7 +21,7 @@ firebase_admin.initialize_app(credentials.Certificate(get_credentials_json()))
|
|
20 |
db = firestore.client()
|
21 |
|
22 |
SUPPORTED_TRANSLATION_LANGUAGES = [
|
23 |
-
|
24 |
]
|
25 |
|
26 |
|
@@ -49,9 +50,20 @@ def compute_elo(battles, k=4, scale=400, base=10, initial_rating=1000):
|
|
49 |
return rating
|
50 |
|
51 |
|
52 |
-
def get_docs(tab: str,
|
|
|
|
|
|
|
53 |
if tab == LeaderboardTab.SUMMARIZATION:
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
if tab == LeaderboardTab.TRANSLATION:
|
57 |
collection = db.collection("arena-translations").order_by("timestamp")
|
@@ -67,8 +79,11 @@ def get_docs(tab: str, source_lang: str = None, target_lang: str = None):
|
|
67 |
return collection.stream()
|
68 |
|
69 |
|
70 |
-
def load_elo_ratings(tab,
|
71 |
-
|
|
|
|
|
|
|
72 |
|
73 |
battles = []
|
74 |
for doc in docs:
|
@@ -94,28 +109,50 @@ LEADERBOARD_UPDATE_INTERVAL = 600 # 10 minutes
|
|
94 |
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
|
95 |
|
96 |
DEFAULT_FILTER_OPTIONS = {
|
|
|
97 |
"source_language": "English",
|
98 |
"target_language": "Spanish"
|
99 |
}
|
100 |
|
101 |
-
filtered_dataframe = gr.DataFrame(
|
102 |
-
headers=["Rank", "Model", "Elo rating"],
|
103 |
-
datatype=["number", "str", "number"],
|
104 |
-
value=lambda: load_elo_ratings(
|
105 |
-
LeaderboardTab.TRANSLATION, DEFAULT_FILTER_OPTIONS[
|
106 |
-
"source_language"], DEFAULT_FILTER_OPTIONS["target_language"]),
|
107 |
-
elem_classes="leaderboard")
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
new_value = load_elo_ratings(
|
112 |
-
target_lang)
|
113 |
return gr.update(value=new_value)
|
114 |
|
115 |
|
116 |
def build_leaderboard():
|
117 |
with gr.Tabs():
|
118 |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
120 |
datatype=["number", "str", "number"],
|
121 |
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION),
|
@@ -137,15 +174,30 @@ def build_leaderboard():
|
|
137 |
value=DEFAULT_FILTER_OPTIONS["target_language"],
|
138 |
interactive=True)
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
source_language.change(fn=update_filtered_leaderboard,
|
141 |
-
inputs=[
|
142 |
-
|
|
|
|
|
|
|
|
|
143 |
target_language.change(fn=update_filtered_leaderboard,
|
144 |
-
inputs=[
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
149 |
|
150 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
151 |
datatype=["number", "str", "number"],
|
|
|
11 |
from firebase_admin import firestore
|
12 |
from google.cloud.firestore_v1 import base_query
|
13 |
import gradio as gr
|
14 |
+
import lingua
|
15 |
import pandas as pd
|
16 |
|
17 |
from credentials import get_credentials_json
|
|
|
21 |
db = firestore.client()
|
22 |
|
23 |
SUPPORTED_TRANSLATION_LANGUAGES = [
|
24 |
+
language.name.capitalize() for language in lingua.Language.all()
|
25 |
]
|
26 |
|
27 |
|
|
|
50 |
return rating
|
51 |
|
52 |
|
53 |
+
def get_docs(tab: str,
|
54 |
+
summary_lang: str = None,
|
55 |
+
source_lang: str = None,
|
56 |
+
target_lang: str = None):
|
57 |
if tab == LeaderboardTab.SUMMARIZATION:
|
58 |
+
collection = db.collection("arena-summarizations").order_by("timestamp")
|
59 |
+
|
60 |
+
if summary_lang:
|
61 |
+
collection = collection.where(filter=base_query.FieldFilter(
|
62 |
+
"model_a_response_language", "==", summary_lang.lower())).where(
|
63 |
+
filter=base_query.FieldFilter("model_b_response_language", "==",
|
64 |
+
summary_lang.lower()))
|
65 |
+
|
66 |
+
return collection.stream()
|
67 |
|
68 |
if tab == LeaderboardTab.TRANSLATION:
|
69 |
collection = db.collection("arena-translations").order_by("timestamp")
|
|
|
79 |
return collection.stream()
|
80 |
|
81 |
|
82 |
+
def load_elo_ratings(tab,
|
83 |
+
summary_lang: str = None,
|
84 |
+
source_lang: str = None,
|
85 |
+
target_lang: str = None):
|
86 |
+
docs = get_docs(tab, summary_lang, source_lang, target_lang)
|
87 |
|
88 |
battles = []
|
89 |
for doc in docs:
|
|
|
109 |
LEADERBOARD_INFO = "The leaderboard is updated every 10 minutes."
|
110 |
|
111 |
DEFAULT_FILTER_OPTIONS = {
|
112 |
+
"summary_language": "English",
|
113 |
"source_language": "English",
|
114 |
"target_language": "Spanish"
|
115 |
}
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
+
def update_filtered_leaderboard(tab, summary_lang: str, source_lang: str,
|
119 |
+
target_lang: str):
|
120 |
+
new_value = load_elo_ratings(tab, summary_lang, source_lang, target_lang)
|
|
|
121 |
return gr.update(value=new_value)
|
122 |
|
123 |
|
124 |
def build_leaderboard():
|
125 |
with gr.Tabs():
|
126 |
with gr.Tab(LeaderboardTab.SUMMARIZATION.value):
|
127 |
+
with gr.Accordion("Filter", open=False):
|
128 |
+
with gr.Row():
|
129 |
+
languages = [
|
130 |
+
language.name.capitalize() for language in lingua.Language.all()
|
131 |
+
]
|
132 |
+
summary_language = gr.Dropdown(
|
133 |
+
choices=languages,
|
134 |
+
value=DEFAULT_FILTER_OPTIONS["summary_language"],
|
135 |
+
label="Summary language",
|
136 |
+
interactive=True)
|
137 |
+
|
138 |
+
with gr.Row():
|
139 |
+
filtered_summarization = gr.DataFrame(
|
140 |
+
headers=["Rank", "Model", "Elo rating"],
|
141 |
+
datatype=["number", "str", "number"],
|
142 |
+
value=lambda: load_elo_ratings(
|
143 |
+
LeaderboardTab.SUMMARIZATION, DEFAULT_FILTER_OPTIONS[
|
144 |
+
"summary_language"]),
|
145 |
+
elem_classes="leaderboard")
|
146 |
+
|
147 |
+
summary_language.change(fn=update_filtered_leaderboard,
|
148 |
+
inputs=[
|
149 |
+
gr.State(LeaderboardTab.SUMMARIZATION),
|
150 |
+
summary_language,
|
151 |
+
gr.State(),
|
152 |
+
gr.State()
|
153 |
+
],
|
154 |
+
outputs=filtered_summarization)
|
155 |
+
|
156 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
157 |
datatype=["number", "str", "number"],
|
158 |
value=lambda: load_elo_ratings(LeaderboardTab.SUMMARIZATION),
|
|
|
174 |
value=DEFAULT_FILTER_OPTIONS["target_language"],
|
175 |
interactive=True)
|
176 |
|
177 |
+
with gr.Row():
|
178 |
+
filtered_translation = gr.DataFrame(
|
179 |
+
headers=["Rank", "Model", "Elo rating"],
|
180 |
+
datatype=["number", "str", "number"],
|
181 |
+
value=lambda: load_elo_ratings(
|
182 |
+
LeaderboardTab.TRANSLATION, DEFAULT_FILTER_OPTIONS[
|
183 |
+
"source_language"], DEFAULT_FILTER_OPTIONS[
|
184 |
+
"target_language"]),
|
185 |
+
elem_classes="leaderboard")
|
186 |
+
|
187 |
source_language.change(fn=update_filtered_leaderboard,
|
188 |
+
inputs=[
|
189 |
+
gr.State(LeaderboardTab.TRANSLATION),
|
190 |
+
gr.State(), source_language,
|
191 |
+
target_language
|
192 |
+
],
|
193 |
+
outputs=filtered_translation)
|
194 |
target_language.change(fn=update_filtered_leaderboard,
|
195 |
+
inputs=[
|
196 |
+
gr.State(LeaderboardTab.TRANSLATION),
|
197 |
+
gr.State(), source_language,
|
198 |
+
target_language
|
199 |
+
],
|
200 |
+
outputs=filtered_translation)
|
201 |
|
202 |
gr.Dataframe(headers=["Rank", "Model", "Elo rating"],
|
203 |
datatype=["number", "str", "number"],
|
requirements.txt
CHANGED
@@ -54,6 +54,7 @@ Jinja2==3.1.3
|
|
54 |
jsonschema==4.21.1
|
55 |
jsonschema-specifications==2023.12.1
|
56 |
kiwisolver==1.4.5
|
|
|
57 |
litellm==1.32.7
|
58 |
markdown-it-py==3.0.0
|
59 |
MarkupSafe==2.1.5
|
|
|
54 |
jsonschema==4.21.1
|
55 |
jsonschema-specifications==2023.12.1
|
56 |
kiwisolver==1.4.5
|
57 |
+
lingua-language-detector==2.0.2
|
58 |
litellm==1.32.7
|
59 |
markdown-it-py==3.0.0
|
60 |
MarkupSafe==2.1.5
|