lewtun HF staff commited on
Commit
af2acd4
·
1 Parent(s): eb4376f

Add eval caching

Browse files
Files changed (3) hide show
  1. app.py +20 -0
  2. evaluation.py +26 -0
  3. utils.py +1 -1
app.py CHANGED
@@ -8,6 +8,8 @@ from datasets import get_dataset_config_names
8
  from dotenv import load_dotenv
9
  from huggingface_hub import list_datasets
10
 
 
 
11
  from utils import (get_compatible_models, get_key, get_metadata, http_get,
12
  http_post)
13
 
@@ -244,6 +246,24 @@ with st.form(key="form"):
244
 
245
  selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
246
  print("Selected models:", selected_models)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  submit_button = st.form_submit_button("Make submission")
248
 
249
  if submit_button:
 
8
  from dotenv import load_dotenv
9
  from huggingface_hub import list_datasets
10
 
11
+ from evaluation import (EvaluationInfo, compute_evaluation_id,
12
+ get_evaluation_ids)
13
  from utils import (get_compatible_models, get_key, get_metadata, http_get,
14
  http_post)
15
 
 
246
 
247
  selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
248
  print("Selected models:", selected_models)
249
+
250
+ evaluation_ids = get_evaluation_ids()
251
+
252
+ for idx, model in enumerate(selected_models):
253
+ eval_info = EvaluationInfo(
254
+ task=selected_task,
255
+ model=model,
256
+ dataset_name=selected_dataset,
257
+ dataset_config=selected_config,
258
+ dataset_split=selected_split,
259
+ )
260
+ candidate_id = hash(eval_info)
261
+ if candidate_id in evaluation_ids:
262
+ st.info(f"Model {model} has already been evaluated on this configuration. Skipping ...")
263
+ selected_models.pop(idx)
264
+
265
+ print("Selected models:", selected_models)
266
+
267
  submit_button = st.form_submit_button("Make submission")
268
 
269
  if submit_button:
evaluation.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ from huggingface_hub import DatasetFilter, HfApi
4
+ from huggingface_hub.hf_api import DatasetInfo
5
+
6
+
7
+ @dataclass(frozen=True, eq=True)
8
+ class EvaluationInfo:
9
+ task: str
10
+ model: str
11
+ dataset_name: str
12
+ dataset_config: str
13
+ dataset_split: str
14
+
15
+
16
+ def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
17
+ metadata = dataset_info.cardData["eval_info"]
18
+ metadata.pop("col_mapping", None)
19
+ evaluation_info = EvaluationInfo(**metadata)
20
+ return hash(evaluation_info)
21
+
22
+
23
+ def get_evaluation_ids():
24
+ filt = DatasetFilter(author="autoevaluate")
25
+ evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
26
+ return [compute_evaluation_id(dset) for dset in evaluation_datasets]
utils.py CHANGED
@@ -1,7 +1,7 @@
1
  from typing import Dict, Union
2
 
3
  import requests
4
- from huggingface_hub import DatasetFilter, HfApi, ModelFilter
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",
 
1
  from typing import Dict, Union
2
 
3
  import requests
4
+ from huggingface_hub import HfApi, ModelFilter
5
 
6
  AUTOTRAIN_TASK_TO_HUB_TASK = {
7
  "binary_classification": "text-classification",