albertvillanova HF staff commited on
Commit
7e32ac7
·
verified ·
1 Parent(s): fae0e19

Refactor glob to use the cache of HfFileSystem

Browse files
Files changed (3) hide show
  1. src/details.py +10 -9
  2. src/hub.py +7 -1
  3. src/results.py +3 -5
src/details.py CHANGED
@@ -2,10 +2,9 @@ import asyncio
2
 
3
  import gradio as gr
4
  import pandas as pd
5
- from huggingface_hub import HfFileSystem
6
 
7
  import src.constants as constants
8
- from src.hub import load_jsonlines_file
9
 
10
 
11
  def update_task_description_component(task):
@@ -40,16 +39,18 @@ def update_load_details_component(model_id_1, model_id_2, subtask):
40
  return gr.Button("Load Details", interactive=False)
41
 
42
 
 
 
 
 
 
 
 
 
43
  async def load_details_dataframe(model_id, subtask):
44
- fs = HfFileSystem()
45
  if not model_id or not subtask:
46
  return
47
- model_name_sanitized = model_id.replace("/", "__")
48
- paths = fs.glob(
49
- f"{constants.DETAILS_DATASET_ID}/**/{constants.DETAILS_FILENAME}".format(
50
- model_name_sanitized=model_name_sanitized, subtask=subtask
51
- )
52
- )
53
  if not paths:
54
  return
55
  path = max(paths)
 
2
 
3
  import gradio as gr
4
  import pandas as pd
 
5
 
6
  import src.constants as constants
7
+ from src.hub import glob, load_jsonlines_file
8
 
9
 
10
  def update_task_description_component(task):
 
39
  return gr.Button("Load Details", interactive=False)
40
 
41
 
42
+ def fetch_details_paths(model_id, subtask):
43
+ model_name_sanitized = model_id.replace("/", "__")
44
+ dataset_id = constants.DETAILS_DATASET_ID.format(model_name_sanitized=model_name_sanitized)
45
+ filename = constants.DETAILS_FILENAME.format(subtask=subtask)
46
+ path = f"{dataset_id}/**/{filename}"
47
+ return glob(path)
48
+
49
+
50
  async def load_details_dataframe(model_id, subtask):
 
51
  if not model_id or not subtask:
52
  return
53
+ paths = fetch_details_paths(model_id, subtask)
 
 
 
 
 
54
  if not paths:
55
  return
56
  path = max(paths)
src/hub.py CHANGED
@@ -2,11 +2,17 @@ import io
2
  import json
3
 
4
  import httpx
5
- from huggingface_hub import hf_hub_url
6
  from huggingface_hub.utils import build_hf_headers
7
 
8
 
9
  client = httpx.AsyncClient(follow_redirects=True)
 
 
 
 
 
 
10
 
11
 
12
  async def load_json_file(path):
 
2
  import json
3
 
4
  import httpx
5
+ from huggingface_hub import HfFileSystem, hf_hub_url
6
  from huggingface_hub.utils import build_hf_headers
7
 
8
 
9
  client = httpx.AsyncClient(follow_redirects=True)
10
+ fs = HfFileSystem()
11
+
12
+
13
+ def glob(path):
14
+ paths = fs.glob(path)
15
+ return paths
16
 
17
 
18
  async def load_json_file(path):
src/results.py CHANGED
@@ -2,16 +2,14 @@ import asyncio
2
 
3
  import gradio as gr
4
  import pandas as pd
5
- from huggingface_hub import HfFileSystem
6
 
7
  import src.constants as constants
8
- from src.hub import load_json_file
9
 
10
 
11
  def fetch_result_paths():
12
- fs = HfFileSystem()
13
- paths = fs.glob(f"{constants.RESULTS_DATASET_ID}/**/**/*.json")
14
- return paths
15
 
16
 
17
  def sort_result_paths_per_model(paths):
 
2
 
3
  import gradio as gr
4
  import pandas as pd
 
5
 
6
  import src.constants as constants
7
+ from src.hub import glob, load_json_file
8
 
9
 
10
  def fetch_result_paths():
11
+ path = f"{constants.RESULTS_DATASET_ID}/**/**/*.json"
12
+ return glob(path)
 
13
 
14
 
15
  def sort_result_paths_per_model(paths):