Jellyfish042 commited on
Commit
cc8a66b
Β·
1 Parent(s): 49c2344
Files changed (1) hide show
  1. app.py +70 -17
app.py CHANGED
@@ -6,6 +6,7 @@ import requests
6
  import huggingface_hub
7
  from huggingface_hub.utils._errors import EntryNotFoundError, RepositoryNotFoundError
8
  from dotenv import load_dotenv
 
9
 
10
  load_dotenv()
11
  webhook_url = os.environ.get("WEBHOOK_URL")
@@ -119,21 +120,25 @@ def get_folders_matching_format(directory):
119
 
120
 
121
  def get_unique_column_names(all_data):
122
- column_names = set()
123
 
124
  for folder_name, files in all_data.items():
125
  for file_name, sheets in files.items():
126
  for sheet_name, dataframe in sheets.items():
127
- column_names.update(dataframe.columns)
128
- column_names.remove('Name')
129
- column_names.remove('Average (The lower the better)')
130
- column_names.remove('Parameters Count (B)')
131
 
132
- return list(column_names)
133
 
134
 
135
- def update_table(period: str, models: list, metric: str, visible_columns: list,
136
- sort_by: str = 'Average (The lower the better)', ascending: bool = True):
 
 
 
 
 
137
  target_data = all_data[period]
138
  target_metric = metric_to_sheet[metric]
139
 
@@ -142,15 +147,38 @@ def update_table(period: str, models: list, metric: str, visible_columns: list,
142
  combined_data = pd.concat([target_data[model][target_metric] for model in target_model_size], axis=0)
143
  combined_data['Name'] = combined_data['Name'].apply(lambda x: x.replace('.pth', ''))
144
 
 
 
145
  if 'Average (The lower the better)' in combined_data.columns:
146
  relevant_columns = [col for col in visible_columns if
147
  col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
148
  combined_data['Average (The lower the better)'] = round(combined_data[relevant_columns].mean(axis=1), 3)
149
 
150
  sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
151
- visible_columns = ['Name', 'Parameters Count (B)', 'Average (The lower the better)'] + visible_columns
 
152
  filtered_data = sorted_data[visible_columns]
153
- return filtered_data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  else:
155
  return pd.DataFrame()
156
 
@@ -210,10 +238,25 @@ initial_period = time_list[-1]
210
  initial_models = model_size_list[:1]
211
  initial_metric = metric_list[0]
212
  initial_columns = get_unique_column_names(all_data)
 
213
 
214
- initial_data = update_table(initial_period, initial_models, initial_metric, initial_columns)
215
 
216
- with gr.Blocks(css=".gradio-container{max-width:95%!important} .tab-buttons button{font-size:1.3em}") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  gr.HTML('<h1 style="text-align:center"><span style="font-size:1.3em">πŸ† Uncheatable Eval Leaderboard</span></h1>')
218
  gr.HTML(
219
  "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval, where fancy fine-tuning and cheating won’t work 🚫; only compute πŸ’», data πŸ“Š, and real innovation πŸ”₯ can prevail!</span></h1>")
@@ -225,20 +268,30 @@ with gr.Blocks(css=".gradio-container{max-width:95%!important} .tab-buttons butt
225
  model_selector = gr.CheckboxGroup(label="Model", choices=model_size_list, value=model_size_list[0])
226
  metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=metric_list[0])
227
  with gr.Column():
 
 
 
228
  colfilter = gr.CheckboxGroup(label="Data Source",
229
  choices=get_unique_column_names(all_data),
230
  value=get_unique_column_names(all_data))
231
 
232
- table = gr.Dataframe(initial_data)
233
 
234
- period_selector.change(update_table, inputs=[period_selector, model_selector, metric_selector, colfilter],
 
235
  outputs=table)
236
- model_selector.change(update_table, inputs=[period_selector, model_selector, metric_selector, colfilter],
 
237
  outputs=table)
238
- metric_selector.change(update_table, inputs=[period_selector, model_selector, metric_selector, colfilter],
 
239
  outputs=table)
240
- colfilter.change(update_table, inputs=[period_selector, model_selector, metric_selector, colfilter],
 
241
  outputs=table)
 
 
 
242
 
243
  with gr.Tab("🌍 MultiLang"):
244
  gr.Markdown("## Coming soon...")
 
6
  import huggingface_hub
7
  from huggingface_hub.utils._errors import EntryNotFoundError, RepositoryNotFoundError
8
  from dotenv import load_dotenv
9
+ from matplotlib.colors import LinearSegmentedColormap
10
 
11
  load_dotenv()
12
  webhook_url = os.environ.get("WEBHOOK_URL")
 
120
 
121
 
122
  def get_unique_column_names(all_data):
123
+ column_names = {}
124
 
125
  for folder_name, files in all_data.items():
126
  for file_name, sheets in files.items():
127
  for sheet_name, dataframe in sheets.items():
128
+ for column in dataframe.columns:
129
+ if column not in ['Name', 'Average (The lower the better)', 'Parameters Count (B)']:
130
+ column_names[column] = None
 
131
 
132
+ return list(column_names.keys())
133
 
134
 
135
+ def update_table(period: str,
136
+ models: list,
137
+ metric: str,
138
+ visible_columns: list,
139
+ color_columns: list,
140
+ sort_by: str = 'Average (The lower the better)',
141
+ ascending: bool = True):
142
  target_data = all_data[period]
143
  target_metric = metric_to_sheet[metric]
144
 
 
147
  combined_data = pd.concat([target_data[model][target_metric] for model in target_model_size], axis=0)
148
  combined_data['Name'] = combined_data['Name'].apply(lambda x: x.replace('.pth', ''))
149
 
150
+ combined_data.reset_index(drop=True, inplace=True)
151
+
152
  if 'Average (The lower the better)' in combined_data.columns:
153
  relevant_columns = [col for col in visible_columns if
154
  col not in ['Name', 'Parameters Count (B)', 'Average (The lower the better)']]
155
  combined_data['Average (The lower the better)'] = round(combined_data[relevant_columns].mean(axis=1), 3)
156
 
157
  sorted_data = combined_data.sort_values(by=sort_by, ascending=ascending)
158
+ sorted_data = sorted_data.rename(columns={'Average (The lower the better)': 'Average (lower=better)'})
159
+ visible_columns = ['Name', 'Parameters Count (B)', 'Average (lower=better)'] + visible_columns
160
  filtered_data = sorted_data[visible_columns]
161
+
162
+ filtered_data.columns = [col.replace('_', ' ') for col in filtered_data.columns]
163
+
164
+ formatter = {col: "{:.3f}" for col in filtered_data.columns if
165
+ filtered_data[col].dtype in ['float64', 'float32']}
166
+
167
+ # color gradient
168
+ colors = ["#63be7b", "#ffffff", "#f8696b"]
169
+ cmap = LinearSegmentedColormap.from_list("custom_cmap", colors)
170
+ target_color_columns = []
171
+ if 'Average' in color_columns:
172
+ target_color_columns.append('Average (lower=better)')
173
+ if 'Individual Tests' in color_columns:
174
+ target_color_columns.extend([col for col in filtered_data.columns if col not in ['Name', 'Parameters Count (B)', 'Average (lower=better)']])
175
+
176
+ styler = filtered_data.style.format(formatter).background_gradient(
177
+ cmap=cmap,
178
+ subset=target_color_columns
179
+ )
180
+
181
+ return styler
182
  else:
183
  return pd.DataFrame()
184
 
 
238
  initial_models = model_size_list[:1]
239
  initial_metric = metric_list[0]
240
  initial_columns = get_unique_column_names(all_data)
241
+ initial_colors = ['Average']
242
 
243
+ initial_data = update_table(initial_period, initial_models, initial_metric, initial_columns, initial_colors)
244
 
245
+ css = '''
246
+ .gradio-container {
247
+ max-width: 95% !important;
248
+ }
249
+ .tab-buttons button {
250
+ font-size: 1.3em;
251
+ }
252
+ .gr-dataframe th {
253
+ white-space: normal;
254
+ word-break: break-word;
255
+ }
256
+
257
+ '''
258
+
259
+ with gr.Blocks(css=css) as demo:
260
  gr.HTML('<h1 style="text-align:center"><span style="font-size:1.3em">πŸ† Uncheatable Eval Leaderboard</span></h1>')
261
  gr.HTML(
262
  "<h1 style='text-align:center'><span style='font-size:0.8em'>Welcome to Uncheatable Eval, where fancy fine-tuning and cheating won’t work 🚫; only compute πŸ’», data πŸ“Š, and real innovation πŸ”₯ can prevail!</span></h1>")
 
268
  model_selector = gr.CheckboxGroup(label="Model", choices=model_size_list, value=model_size_list[0])
269
  metric_selector = gr.Dropdown(label="Metric", choices=metric_list, value=metric_list[0])
270
  with gr.Column():
271
+ color_selector = gr.CheckboxGroup(label="Colored Columns",
272
+ choices=['Average', 'Individual Tests'],
273
+ value=['Average'])
274
  colfilter = gr.CheckboxGroup(label="Data Source",
275
  choices=get_unique_column_names(all_data),
276
  value=get_unique_column_names(all_data))
277
 
278
+ table = gr.Dataframe(initial_data, column_widths=[110, 35, 35, 35, 35, 35, 35, 35, 35, 35], wrap=True)
279
 
280
+ period_selector.change(update_table,
281
+ inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector],
282
  outputs=table)
283
+ model_selector.change(update_table,
284
+ inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector],
285
  outputs=table)
286
+ metric_selector.change(update_table,
287
+ inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector],
288
  outputs=table)
289
+ colfilter.change(update_table,
290
+ inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector],
291
  outputs=table)
292
+ color_selector.change(update_table,
293
+ inputs=[period_selector, model_selector, metric_selector, colfilter, color_selector],
294
+ outputs=table)
295
 
296
  with gr.Tab("🌍 MultiLang"):
297
  gr.Markdown("## Coming soon...")