Spaces:
Running
Running
Jellyfish042
commited on
Commit
Β·
6e57415
1
Parent(s):
8171dbf
update 14B
Browse files- app.py +74 -86
- data/2024-05/14b.xlsx +0 -0
- data/2024-06/14b.xlsx +0 -0
- data/2024-07/14b.xlsx +0 -0
app.py
CHANGED
@@ -17,6 +17,7 @@ load_dotenv()
|
|
17 |
webhook_url = os.environ.get("WEBHOOK_URL")
|
18 |
|
19 |
file_name_list = [
|
|
|
20 |
'9b',
|
21 |
'7b',
|
22 |
'3b',
|
@@ -36,6 +37,7 @@ metric_list = [
|
|
36 |
]
|
37 |
|
38 |
model_size_list = [
|
|
|
39 |
'~9B',
|
40 |
'~7B',
|
41 |
'~3B',
|
@@ -49,25 +51,13 @@ metric_to_sheet = {
|
|
49 |
}
|
50 |
|
51 |
model_size_to_file_name = {
|
|
|
52 |
'~9B': '9b',
|
53 |
'~7B': '7b',
|
54 |
'~3B': '3b',
|
55 |
'~1.5B': '1b5',
|
56 |
}
|
57 |
|
58 |
-
css = """
|
59 |
-
.gr-dataframe table {
|
60 |
-
table-layout: fixed;
|
61 |
-
width: 100%; /* Ensures the table fills its container */
|
62 |
-
}
|
63 |
-
.gr-dataframe th, .gr-dataframe td {
|
64 |
-
width: 100px; /* Set the exact width of each cell */
|
65 |
-
overflow: hidden; /* Ensures the content doesn't overflow */
|
66 |
-
text-overflow: ellipsis; /* Adds an ellipsis (...) if the text overflows */
|
67 |
-
white-space: nowrap; /* Keeps the content on a single line */
|
68 |
-
}
|
69 |
-
"""
|
70 |
-
|
71 |
about_md = """
|
72 |
# Uncheatable Eval
|
73 |
|
@@ -167,61 +157,49 @@ def update_table(period: str,
|
|
167 |
if 'Average (The lower the better)' in combined_data.columns:
|
168 |
relevant_columns = [col for col in visible_columns if
|
169 |
col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
target_color_columns = []
|
214 |
-
if 'Average' in color_columns:
|
215 |
-
target_color_columns.append('Average (lower=better)')
|
216 |
-
if 'Individual Tests' in color_columns:
|
217 |
-
target_color_columns.extend([col for col in filtered_data.columns if
|
218 |
-
col not in ['Name', 'Parameters Count (B)', 'Average (lower=better)']])
|
219 |
-
|
220 |
-
styler = filtered_data.style.format(formatter).map(color_cell, subset=['Parameters Count (B)'])
|
221 |
-
for column in target_color_columns:
|
222 |
-
styler = styler.background_gradient(cmap=cmap, subset=[column], vmin=vmin[column], vmax=vmax[column])
|
223 |
-
|
224 |
-
return styler
|
225 |
else:
|
226 |
return pd.DataFrame()
|
227 |
|
@@ -334,7 +312,8 @@ def create_scaling_plot(all_data, period):
|
|
334 |
y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
335 |
|
336 |
fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title='Params(B)')
|
337 |
-
fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title='Compression Rate (%)',
|
|
|
338 |
|
339 |
fig.update_layout(
|
340 |
xaxis=dict(showgrid=True, zeroline=False),
|
@@ -346,20 +325,26 @@ def create_scaling_plot(all_data, period):
|
|
346 |
return fig
|
347 |
|
348 |
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
all_data
|
356 |
-
|
357 |
-
|
358 |
-
all_data
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
initial_fig = create_scaling_plot(all_data, time_list[-1])
|
365 |
|
@@ -385,10 +370,12 @@ css = '''
|
|
385 |
|
386 |
'''
|
387 |
|
|
|
|
|
|
|
388 |
with gr.Blocks(css=css) as demo:
|
389 |
-
gr.HTML(
|
390 |
-
gr.HTML(
|
391 |
-
"<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating wonβt work π«; only compute π», data π, and real innovation π₯ can prevail!</span></h1>")
|
392 |
with gr.Tabs() as tabs:
|
393 |
with gr.Tab("π Leaderboard"):
|
394 |
with gr.Row():
|
@@ -430,6 +417,7 @@ with gr.Blocks(css=css) as demo:
|
|
430 |
with gr.Tab("π Scaling Law"):
|
431 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=time_list[0])
|
432 |
|
|
|
433 |
def update_plot(period):
|
434 |
new_fig = create_scaling_plot(all_data, period)
|
435 |
return new_fig
|
|
|
17 |
webhook_url = os.environ.get("WEBHOOK_URL")
|
18 |
|
19 |
file_name_list = [
|
20 |
+
'14b',
|
21 |
'9b',
|
22 |
'7b',
|
23 |
'3b',
|
|
|
37 |
]
|
38 |
|
39 |
model_size_list = [
|
40 |
+
'~14B',
|
41 |
'~9B',
|
42 |
'~7B',
|
43 |
'~3B',
|
|
|
51 |
}
|
52 |
|
53 |
model_size_to_file_name = {
|
54 |
+
'~14B': '14b',
|
55 |
'~9B': '9b',
|
56 |
'~7B': '7b',
|
57 |
'~3B': '3b',
|
58 |
'~1.5B': '1b5',
|
59 |
}
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
about_md = """
|
62 |
# Uncheatable Eval
|
63 |
|
|
|
157 |
if 'Average (The lower the better)' in combined_data.columns:
|
158 |
relevant_columns = [col for col in visible_columns if
|
159 |
col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
|
160 |
+
if len(combined_data) > 0:
|
161 |
+
combined_data['Average (The lower the better)'] = round(combined_data[relevant_columns].mean(axis=1), 3)
|
162 |
+
|
163 |
+
if len(combined_data) > 0:
|
164 |
+
sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
|
165 |
+
sorted_data = sorted_data.rename(columns={'Average (The lower the better)': 'Average (lower=better)'})
|
166 |
+
visible_columns = ['Name', 'Parameters Count (B)', 'Average (lower=better)'] + visible_columns
|
167 |
+
filtered_data = sorted_data[visible_columns]
|
168 |
+
|
169 |
+
filtered_data.columns = [col.replace('_', ' ') for col in filtered_data.columns]
|
170 |
+
|
171 |
+
formatter = {col: "{:.3f}" for col in filtered_data.columns if
|
172 |
+
filtered_data[col].dtype in ['float64', 'float32']}
|
173 |
+
|
174 |
+
# color gradient
|
175 |
+
colors = ["#63be7b", "#ffffff", "#f8696b"]
|
176 |
+
cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
|
177 |
+
vmin = {}
|
178 |
+
vmax = {}
|
179 |
+
for column in filtered_data.columns:
|
180 |
+
if column in ['Name', 'Parameters Count (B)']:
|
181 |
+
continue
|
182 |
+
col_values = filtered_data[column]
|
183 |
+
if len(col_values) > 1:
|
184 |
+
second_largest = col_values.nlargest(2).iloc[-1]
|
185 |
+
vmin[column] = col_values.min()
|
186 |
+
vmax[column] = second_largest
|
187 |
+
|
188 |
+
target_color_columns = []
|
189 |
+
if 'Average' in color_columns:
|
190 |
+
target_color_columns.append('Average (lower=better)')
|
191 |
+
if 'Individual Tests' in color_columns:
|
192 |
+
target_color_columns.extend([col for col in filtered_data.columns if
|
193 |
+
col not in ['Name', 'Parameters Count (B)', 'Average (lower=better)']])
|
194 |
+
|
195 |
+
styler = filtered_data.style.format(formatter)
|
196 |
+
for column in target_color_columns:
|
197 |
+
if column in vmin and column in vmax: # Ensure that the vmin and vmax dicts contain the column
|
198 |
+
styler = styler.background_gradient(cmap=cmap, subset=[column], vmin=vmin[column], vmax=vmax[column])
|
199 |
+
|
200 |
+
return styler
|
201 |
+
else:
|
202 |
+
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
else:
|
204 |
return pd.DataFrame()
|
205 |
|
|
|
312 |
y_tick_text = [f"{val:.1f}" for val in y_tick_vals]
|
313 |
|
314 |
fig.update_xaxes(tickvals=np.log(x_tick_vals), ticktext=x_tick_text, title='Params(B)')
|
315 |
+
fig.update_yaxes(tickvals=np.log(y_tick_vals), ticktext=y_tick_text, title='Compression Rate (%)',
|
316 |
+
autorange='reversed')
|
317 |
|
318 |
fig.update_layout(
|
319 |
xaxis=dict(showgrid=True, zeroline=False),
|
|
|
325 |
return fig
|
326 |
|
327 |
|
328 |
+
def read_all_data(folder_name):
|
329 |
+
all_data = {}
|
330 |
+
time_list = []
|
331 |
+
for folder in get_folders_matching_format(folder_name):
|
332 |
+
folder_name = os.path.basename(folder)
|
333 |
+
time_list.append(folder_name)
|
334 |
+
if all_data.get(folder) is None:
|
335 |
+
all_data[folder_name] = {}
|
336 |
+
for file_name in file_name_list:
|
337 |
+
if all_data.get(file_name) is None:
|
338 |
+
all_data[folder_name][file_name] = {}
|
339 |
+
for sheet_name in sheet_name_list:
|
340 |
+
final_file_name = os.path.join(folder, file_name)
|
341 |
+
all_data[folder_name][file_name][sheet_name] = rename_columns(
|
342 |
+
pd.read_excel(final_file_name + '.xlsx', sheet_name=sheet_name))
|
343 |
+
|
344 |
+
return all_data, time_list
|
345 |
+
|
346 |
+
|
347 |
+
all_data, time_list = read_all_data('data')
|
348 |
|
349 |
initial_fig = create_scaling_plot(all_data, time_list[-1])
|
350 |
|
|
|
370 |
|
371 |
'''
|
372 |
|
373 |
+
TITLE_HTML = '<h1 style="text-align:center"><span style="font-size:1.3em">π LLM Compression Leaderboard</span></h1>'
|
374 |
+
SUBTITLE_HTML = "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval LLM Compression Leaderboard, where fancy fine-tuning and cheating wonβt work π«; only compute π», data π, and real innovation π₯ can prevail!</span></h1>"
|
375 |
+
|
376 |
with gr.Blocks(css=css) as demo:
|
377 |
+
gr.HTML(TITLE_HTML)
|
378 |
+
gr.HTML(SUBTITLE_HTML)
|
|
|
379 |
with gr.Tabs() as tabs:
|
380 |
with gr.Tab("π Leaderboard"):
|
381 |
with gr.Row():
|
|
|
417 |
with gr.Tab("π Scaling Law"):
|
418 |
period_selector_2 = gr.Dropdown(label="Period", choices=time_list, value=time_list[0])
|
419 |
|
420 |
+
|
421 |
def update_plot(period):
|
422 |
new_fig = create_scaling_plot(all_data, period)
|
423 |
return new_fig
|
data/2024-05/14b.xlsx
ADDED
Binary file (10.5 kB). View file
|
|
data/2024-06/14b.xlsx
ADDED
Binary file (10.5 kB). View file
|
|
data/2024-07/14b.xlsx
ADDED
Binary file (11.3 kB). View file
|
|