abhinav-joshi commited on
Commit
3f2777e
·
1 Parent(s): a92fba7

update submission

Browse files
Files changed (3) hide show
  1. app.py +102 -73
  2. src/about.py +3 -27
  3. src/display/utils.py +25 -18
app.py CHANGED
@@ -168,33 +168,33 @@ with demo:
168
  elem_id="column-select",
169
  interactive=True,
170
  )
171
- with gr.Row():
172
- deleted_models_visibility = gr.Checkbox(
173
- value=False, label="Show gated/private/deleted models", interactive=True
174
- )
175
- with gr.Column(min_width=320):
176
- # with gr.Box(elem_id="box-filter"):
177
- filter_columns_type = gr.CheckboxGroup(
178
- label="Model types",
179
- choices=[t.to_str() for t in ModelType],
180
- value=[t.to_str() for t in ModelType],
181
- interactive=True,
182
- elem_id="filter-columns-type",
183
- )
184
- filter_columns_precision = gr.CheckboxGroup(
185
- label="Precision",
186
- choices=[i.value.name for i in Precision],
187
- value=[i.value.name for i in Precision],
188
- interactive=True,
189
- elem_id="filter-columns-precision",
190
- )
191
- filter_columns_size = gr.CheckboxGroup(
192
- label="Model sizes (in billions of parameters)",
193
- choices=list(NUMERIC_INTERVALS.keys()),
194
- value=list(NUMERIC_INTERVALS.keys()),
195
- interactive=True,
196
- elem_id="filter-columns-size",
197
- )
198
 
199
  leaderboard_table = gr.components.Dataframe(
200
  value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
@@ -217,30 +217,30 @@ with demo:
217
  [
218
  hidden_leaderboard_table_for_search,
219
  shown_columns,
220
- filter_columns_type,
221
- filter_columns_precision,
222
- filter_columns_size,
223
- deleted_models_visibility,
224
  search_bar,
225
  ],
226
  leaderboard_table,
227
  )
228
  for selector in [
229
  shown_columns,
230
- filter_columns_type,
231
- filter_columns_precision,
232
- filter_columns_size,
233
- deleted_models_visibility,
234
  ]:
235
  selector.change(
236
  update_table,
237
  [
238
  hidden_leaderboard_table_for_search,
239
  shown_columns,
240
- filter_columns_type,
241
- filter_columns_precision,
242
- filter_columns_size,
243
- deleted_models_visibility,
244
  search_bar,
245
  ],
246
  leaderboard_table,
@@ -290,53 +290,82 @@ with demo:
290
  datatype=EVAL_TYPES,
291
  row_count=5,
292
  )
293
- with gr.Row():
294
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
295
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  with gr.Row():
297
  with gr.Column():
298
- model_name_textbox = gr.Textbox(label="Model name")
299
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
300
- model_type = gr.Dropdown(
301
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
302
- label="Model type",
303
- multiselect=False,
304
- value=None,
305
- interactive=True,
306
- )
307
-
308
  with gr.Column():
309
- precision = gr.Dropdown(
310
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
311
- label="Precision",
312
- multiselect=False,
313
- value="float16",
314
- interactive=True,
315
- )
316
- weight_type = gr.Dropdown(
317
- choices=[i.value.name for i in WeightType],
318
- label="Weights type",
319
- multiselect=False,
320
- value="Original",
321
- interactive=True,
322
- )
323
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
324
 
325
  submit_button = gr.Button("Submit Eval")
326
  submission_result = gr.Markdown()
327
  submit_button.click(
328
  add_new_eval,
329
  [
330
- model_name_textbox,
331
- base_model_name_textbox,
332
- revision_name_textbox,
333
- precision,
334
- weight_type,
335
- model_type,
 
336
  ],
337
  submission_result,
338
  )
339
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  with gr.Row():
341
  with gr.Accordion("📙 Citation", open=False):
342
  citation_button = gr.Textbox(
 
168
  elem_id="column-select",
169
  interactive=True,
170
  )
171
+ # with gr.Row():
172
+ # deleted_models_visibility = gr.Checkbox(
173
+ # value=False, label="Show gated/private/deleted models", interactive=True
174
+ # )
175
+ # with gr.Column(min_width=320):
176
+ # # with gr.Box(elem_id="box-filter"):
177
+ # filter_columns_type = gr.CheckboxGroup(
178
+ # label="Model types",
179
+ # choices=[t.to_str() for t in ModelType],
180
+ # value=[t.to_str() for t in ModelType],
181
+ # interactive=True,
182
+ # elem_id="filter-columns-type",
183
+ # )
184
+ # filter_columns_precision = gr.CheckboxGroup(
185
+ # label="Precision",
186
+ # choices=[i.value.name for i in Precision],
187
+ # value=[i.value.name for i in Precision],
188
+ # interactive=True,
189
+ # elem_id="filter-columns-precision",
190
+ # )
191
+ # filter_columns_size = gr.CheckboxGroup(
192
+ # label="Model sizes (in billions of parameters)",
193
+ # choices=list(NUMERIC_INTERVALS.keys()),
194
+ # value=list(NUMERIC_INTERVALS.keys()),
195
+ # interactive=True,
196
+ # elem_id="filter-columns-size",
197
+ # )
198
 
199
  leaderboard_table = gr.components.Dataframe(
200
  value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
 
217
  [
218
  hidden_leaderboard_table_for_search,
219
  shown_columns,
220
+ # filter_columns_type,
221
+ # filter_columns_precision,
222
+ # filter_columns_size,
223
+ # deleted_models_visibility,
224
  search_bar,
225
  ],
226
  leaderboard_table,
227
  )
228
  for selector in [
229
  shown_columns,
230
+ # filter_columns_type,
231
+ # filter_columns_precision,
232
+ # filter_columns_size,
233
+ # deleted_models_visibility,
234
  ]:
235
  selector.change(
236
  update_table,
237
  [
238
  hidden_leaderboard_table_for_search,
239
  shown_columns,
240
+ # filter_columns_type,
241
+ # filter_columns_precision,
242
+ # filter_columns_size,
243
+ # deleted_models_visibility,
244
  search_bar,
245
  ],
246
  leaderboard_table,
 
290
  datatype=EVAL_TYPES,
291
  row_count=5,
292
  )
293
+ # with gr.Row():
294
+ # gr.Markdown("# ✉️✨ Submit your Results here!", elem_classes="markdown-text")
295
+
296
+ # with gr.Row():
297
+ # with gr.Column():
298
+ # model_name_textbox = gr.Textbox(label="Model name")
299
+ # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
300
+ # model_type = gr.Dropdown(
301
+ # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
302
+ # label="Model type",
303
+ # multiselect=False,
304
+ # value=None,
305
+ # interactive=True,
306
+ # )
307
+
308
+ # with gr.Column():
309
+ # precision = gr.Dropdown(
310
+ # choices=[i.value.name for i in Precision if i != Precision.Unknown],
311
+ # label="Precision",
312
+ # multiselect=False,
313
+ # value="float16",
314
+ # interactive=True,
315
+ # )
316
+ # weight_type = gr.Dropdown(
317
+ # choices=[i.value.name for i in WeightType],
318
+ # label="Weights type",
319
+ # multiselect=False,
320
+ # value="Original",
321
+ # interactive=True,
322
+ # )
323
+ # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
324
+
325
+ with gr.Accordion("Submit a new model for evaluation"):
326
  with gr.Row():
327
  with gr.Column():
328
+ method_name_textbox = gr.Textbox(label="Method name")
329
+ # llama, phi
330
+ model_family_radio = gr.Radio(["llama", "phi"], value="llama", label="Model family")
331
+ forget_rate_radio = gr.Radio(["1%", "5%", "10%"], value="10%", label="Forget rate")
332
+ url_textbox = gr.Textbox(label="Url to model information")
 
 
 
 
 
333
  with gr.Column():
334
+ organisation = gr.Textbox(label="Organisation")
335
+ mail = gr.Textbox(label="Contact email")
336
+ file_output = gr.File()
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
  submit_button = gr.Button("Submit Eval")
339
  submission_result = gr.Markdown()
340
  submit_button.click(
341
  add_new_eval,
342
  [
343
+ method_name_textbox,
344
+ model_family_radio,
345
+ forget_rate_radio,
346
+ url_textbox,
347
+ file_output,
348
+ organisation,
349
+ mail,
350
  ],
351
  submission_result,
352
  )
353
 
354
+ # submit_button = gr.Button("Submit Eval")
355
+ # submission_result = gr.Markdown()
356
+ # submit_button.click(
357
+ # add_new_eval,
358
+ # [
359
+ # model_name_textbox,
360
+ # base_model_name_textbox,
361
+ # revision_name_textbox,
362
+ # precision,
363
+ # weight_type,
364
+ # model_type,
365
+ # ],
366
+ # submission_result,
367
+ # )
368
+
369
  with gr.Row():
370
  with gr.Accordion("📙 Citation", open=False):
371
  citation_button = gr.Textbox(
src/about.py CHANGED
@@ -30,7 +30,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
30
 
31
 
32
  # Your leaderboard name
33
- TITLE = """<h1 align="center" id="space-title">IL-TUR leaderboard</h1>"""
34
 
35
  # What does your leaderboard evaluate?
36
  INTRODUCTION_TEXT = """
@@ -47,33 +47,9 @@ To reproduce our results, here is the commands you can run:
47
  """
48
 
49
  EVALUATION_QUEUE_TEXT = """
50
- ## Some good practices before submitting a model
51
 
52
- ### 1) Make sure you can load your model and tokenizer using AutoClasses:
53
- ```python
54
- from transformers import AutoConfig, AutoModel, AutoTokenizer
55
- config = AutoConfig.from_pretrained("your model name", revision=revision)
56
- model = AutoModel.from_pretrained("your model name", revision=revision)
57
- tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
58
- ```
59
- If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
60
-
61
- Note: make sure your model is public!
62
- Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
63
-
64
- ### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
65
- It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
66
-
67
- ### 3) Make sure your model has an open license!
68
- This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
69
-
70
- ### 4) Fill up your model card
71
- When we add extra information about models to the leaderboard, it will be automatically taken from the model card
72
-
73
- ## In case of model failure
74
- If your model is displayed in the `FAILED` category, its execution stopped.
75
- Make sure you have followed the above steps first.
76
- If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
77
  """
78
 
79
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 
30
 
31
 
32
  # Your leaderboard name
33
+ TITLE = """<h1 align="center" id="space-title">IL-TUR Leaderboard</h1>"""
34
 
35
  # What does your leaderboard evaluate?
36
  INTRODUCTION_TEXT = """
 
47
  """
48
 
49
  EVALUATION_QUEUE_TEXT = """
50
+ We encourage submissions for the IL-TUR leaderboard. The leaderboard is open to all researchers and practitioners.
51
 
52
+ Every task has its own leaderboard, and researchers can submit their results for any task. We also encourage submissions for multiple tasks.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  """
54
 
55
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
src/display/utils.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
 
6
  from src.about import Tasks
7
 
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
@@ -20,29 +21,31 @@ class ColumnContent:
20
  hidden: bool = False
21
  never_hidden: bool = False
22
 
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
- #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
32
- # Model information
33
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
34
- auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
35
- auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
36
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
37
- auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
38
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
39
- auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
40
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
41
- auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
42
 
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
45
 
 
46
  ## For the queue columns in the submission tab
47
  @dataclass(frozen=True)
48
  class EvalQueueColumn: # Queue column
@@ -53,12 +56,13 @@ class EvalQueueColumn: # Queue column
53
  weight_type = ColumnContent("weight_type", "str", "Original")
54
  status = ColumnContent("status", "str", True)
55
 
 
56
  ## All the model information that we might need
57
  @dataclass
58
  class ModelDetails:
59
  name: str
60
  display_name: str = ""
61
- symbol: str = "" # emoji
62
 
63
 
64
  class ModelType(Enum):
@@ -83,18 +87,20 @@ class ModelType(Enum):
83
  return ModelType.IFT
84
  return ModelType.Unknown
85
 
 
86
  class WeightType(Enum):
87
  Adapter = ModelDetails("Adapter")
88
  Original = ModelDetails("Original")
89
  Delta = ModelDetails("Delta")
90
 
 
91
  class Precision(Enum):
92
  float16 = ModelDetails("float16")
93
  bfloat16 = ModelDetails("bfloat16")
94
  float32 = ModelDetails("float32")
95
- #qt_8bit = ModelDetails("8bit")
96
- #qt_4bit = ModelDetails("4bit")
97
- #qt_GPTQ = ModelDetails("GPTQ")
98
  Unknown = ModelDetails("?")
99
 
100
  def from_str(precision):
@@ -104,14 +110,15 @@ class Precision(Enum):
104
  return Precision.bfloat16
105
  if precision in ["float32"]:
106
  return Precision.float32
107
- #if precision in ["8bit"]:
108
  # return Precision.qt_8bit
109
- #if precision in ["4bit"]:
110
  # return Precision.qt_4bit
111
- #if precision in ["GPTQ", "None"]:
112
  # return Precision.qt_GPTQ
113
  return Precision.Unknown
114
 
 
115
  # Column selection
116
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
117
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
 
5
 
6
  from src.about import Tasks
7
 
8
+
9
  def fields(raw_class):
10
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
11
 
 
21
  hidden: bool = False
22
  never_hidden: bool = False
23
 
24
+
25
  ## Leaderboard columns
26
  auto_eval_column_dict = []
27
  # Init
28
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
29
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
30
+ # Scores
31
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
32
  for task in Tasks:
33
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
34
+ # # Model information
35
+ # auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
36
+ # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
37
+ # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
38
+ # auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
39
+ # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
40
+ # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
41
+ # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
42
+ # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
43
+ # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
44
 
45
  # We use make dataclass to dynamically fill the scores from Tasks
46
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
47
 
48
+
49
  ## For the queue columns in the submission tab
50
  @dataclass(frozen=True)
51
  class EvalQueueColumn: # Queue column
 
56
  weight_type = ColumnContent("weight_type", "str", "Original")
57
  status = ColumnContent("status", "str", True)
58
 
59
+
60
  ## All the model information that we might need
61
  @dataclass
62
  class ModelDetails:
63
  name: str
64
  display_name: str = ""
65
+ symbol: str = "" # emoji
66
 
67
 
68
  class ModelType(Enum):
 
87
  return ModelType.IFT
88
  return ModelType.Unknown
89
 
90
+
91
  class WeightType(Enum):
92
  Adapter = ModelDetails("Adapter")
93
  Original = ModelDetails("Original")
94
  Delta = ModelDetails("Delta")
95
 
96
+
97
  class Precision(Enum):
98
  float16 = ModelDetails("float16")
99
  bfloat16 = ModelDetails("bfloat16")
100
  float32 = ModelDetails("float32")
101
+ # qt_8bit = ModelDetails("8bit")
102
+ # qt_4bit = ModelDetails("4bit")
103
+ # qt_GPTQ = ModelDetails("GPTQ")
104
  Unknown = ModelDetails("?")
105
 
106
  def from_str(precision):
 
110
  return Precision.bfloat16
111
  if precision in ["float32"]:
112
  return Precision.float32
113
+ # if precision in ["8bit"]:
114
  # return Precision.qt_8bit
115
+ # if precision in ["4bit"]:
116
  # return Precision.qt_4bit
117
+ # if precision in ["GPTQ", "None"]:
118
  # return Precision.qt_GPTQ
119
  return Precision.Unknown
120
 
121
+
122
  # Column selection
123
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
124
  TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]