Spaces:
Running
Running
hanhainebula
commited on
Commit
·
f58e22b
1
Parent(s):
40aae8d
fix issue: auto-loading submit infos
Browse files- app.py +19 -6
- src/backend.py +28 -6
- src/envs.py +4 -1
app.py
CHANGED
@@ -7,9 +7,9 @@ import multiprocessing
|
|
7 |
|
8 |
from src.backend import pull_search_results
|
9 |
from src.envs import (
|
10 |
-
API,
|
11 |
-
HF_CACHE_DIR,
|
12 |
-
HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
|
13 |
UNZIP_TARGET_DIR,
|
14 |
TIME_DURATION,
|
15 |
EVAL_K_VALUES,
|
@@ -20,7 +20,9 @@ from src.css_html_js import custom_css
|
|
20 |
logger = logging.getLogger(__name__)
|
21 |
logging.basicConfig(
|
22 |
level=logging.WARNING,
|
23 |
-
|
|
|
|
|
24 |
)
|
25 |
|
26 |
|
@@ -29,8 +31,17 @@ logging.basicConfig(
|
|
29 |
|
30 |
|
31 |
def load_submit_infos_df():
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
submit_infos = json.load(f)
|
35 |
else:
|
36 |
submit_infos = []
|
@@ -65,6 +76,8 @@ if __name__ == "__main__":
|
|
65 |
HF_SEARCH_RESULTS_REPO_DIR,
|
66 |
HF_EVAL_RESULTS_REPO_DIR,
|
67 |
UNZIP_TARGET_DIR,
|
|
|
|
|
68 |
EVAL_K_VALUES,
|
69 |
HF_CACHE_DIR,
|
70 |
TIME_DURATION,
|
|
|
7 |
|
8 |
from src.backend import pull_search_results
|
9 |
from src.envs import (
|
10 |
+
API, START_COMMIT_ID,
|
11 |
+
HF_CACHE_DIR, SUBMIT_INFOS_DIR, SUBMIT_INFOS_FILE_NAME,
|
12 |
+
HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR, SUBMIT_INFOS_REPO,
|
13 |
UNZIP_TARGET_DIR,
|
14 |
TIME_DURATION,
|
15 |
EVAL_K_VALUES,
|
|
|
20 |
logger = logging.getLogger(__name__)
|
21 |
logging.basicConfig(
|
22 |
level=logging.WARNING,
|
23 |
+
datefmt='%Y-%m-%d %H:%M:%S',
|
24 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
25 |
+
force=True
|
26 |
)
|
27 |
|
28 |
|
|
|
31 |
|
32 |
|
33 |
def load_submit_infos_df():
|
34 |
+
# Pull the submit infos
|
35 |
+
API.snapshot_download(
|
36 |
+
repo_id=SUBMIT_INFOS_REPO,
|
37 |
+
repo_type="dataset",
|
38 |
+
local_dir=SUBMIT_INFOS_DIR,
|
39 |
+
etag_timeout=30
|
40 |
+
)
|
41 |
+
submit_infos_save_path = os.path.join(SUBMIT_INFOS_DIR, SUBMIT_INFOS_FILE_NAME)
|
42 |
+
|
43 |
+
if os.path.exists(submit_infos_save_path):
|
44 |
+
with open(submit_infos_save_path, 'r', encoding='utf-8') as f:
|
45 |
submit_infos = json.load(f)
|
46 |
else:
|
47 |
submit_infos = []
|
|
|
76 |
HF_SEARCH_RESULTS_REPO_DIR,
|
77 |
HF_EVAL_RESULTS_REPO_DIR,
|
78 |
UNZIP_TARGET_DIR,
|
79 |
+
SUBMIT_INFOS_DIR,
|
80 |
+
SUBMIT_INFOS_FILE_NAME,
|
81 |
EVAL_K_VALUES,
|
82 |
HF_CACHE_DIR,
|
83 |
TIME_DURATION,
|
src/backend.py
CHANGED
@@ -13,15 +13,17 @@ from air_benchmark.evaluation_utils.evaluator import Evaluator
|
|
13 |
|
14 |
from src.envs import (
|
15 |
API,
|
16 |
-
ZIP_CACHE_DIR,
|
17 |
-
SEARCH_RESULTS_REPO, RESULTS_REPO,
|
18 |
make_clickable_model
|
19 |
)
|
20 |
|
21 |
logger = logging.getLogger(__name__)
|
22 |
logging.basicConfig(
|
23 |
level=logging.WARNING,
|
24 |
-
|
|
|
|
|
25 |
)
|
26 |
|
27 |
|
@@ -156,12 +158,22 @@ def pull_search_results(
|
|
156 |
hf_search_results_repo_dir: str,
|
157 |
hf_eval_results_repo_dir: str,
|
158 |
unzip_target_dir: str,
|
|
|
|
|
159 |
k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
|
160 |
cache_dir: str = None,
|
161 |
time_duration: int = 1800,
|
162 |
start_commit_id: str = None
|
163 |
):
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
if start_commit_id is not None:
|
166 |
API.snapshot_download(
|
167 |
repo_id=SEARCH_RESULTS_REPO,
|
@@ -182,7 +194,7 @@ def pull_search_results(
|
|
182 |
)
|
183 |
cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
|
184 |
|
185 |
-
|
186 |
while True:
|
187 |
os.makedirs(ZIP_CACHE_DIR, exist_ok=True)
|
188 |
os.makedirs(unzip_target_dir, exist_ok=True)
|
@@ -331,9 +343,19 @@ def pull_search_results(
|
|
331 |
# update submit infos
|
332 |
cur_file_paths = new_file_paths
|
333 |
submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
|
334 |
-
|
|
|
335 |
json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
|
336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
# Wait for the next update
|
338 |
logger.warning(f"Wait for {time_duration} seconds for the next update ...")
|
339 |
|
|
|
13 |
|
14 |
from src.envs import (
|
15 |
API,
|
16 |
+
ZIP_CACHE_DIR,
|
17 |
+
SEARCH_RESULTS_REPO, RESULTS_REPO, SUBMIT_INFOS_REPO,
|
18 |
make_clickable_model
|
19 |
)
|
20 |
|
21 |
logger = logging.getLogger(__name__)
|
22 |
logging.basicConfig(
|
23 |
level=logging.WARNING,
|
24 |
+
datefmt='%Y-%m-%d %H:%M:%S',
|
25 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
26 |
+
force=True
|
27 |
)
|
28 |
|
29 |
|
|
|
158 |
hf_search_results_repo_dir: str,
|
159 |
hf_eval_results_repo_dir: str,
|
160 |
unzip_target_dir: str,
|
161 |
+
submit_infos_dir: str,
|
162 |
+
submit_infos_file_name: str = "submit_infos.json",
|
163 |
k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
|
164 |
cache_dir: str = None,
|
165 |
time_duration: int = 1800,
|
166 |
start_commit_id: str = None
|
167 |
):
|
168 |
+
# Pull the submit infos
|
169 |
+
API.snapshot_download(
|
170 |
+
repo_id=SUBMIT_INFOS_REPO,
|
171 |
+
repo_type="dataset",
|
172 |
+
local_dir=submit_infos_dir,
|
173 |
+
etag_timeout=30
|
174 |
+
)
|
175 |
+
|
176 |
+
logger.warning(f"Start from commit: {start_commit_id}")
|
177 |
if start_commit_id is not None:
|
178 |
API.snapshot_download(
|
179 |
repo_id=SEARCH_RESULTS_REPO,
|
|
|
194 |
)
|
195 |
cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
|
196 |
|
197 |
+
logger.warning("Start to pull new search results ...")
|
198 |
while True:
|
199 |
os.makedirs(ZIP_CACHE_DIR, exist_ok=True)
|
200 |
os.makedirs(unzip_target_dir, exist_ok=True)
|
|
|
343 |
# update submit infos
|
344 |
cur_file_paths = new_file_paths
|
345 |
submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
|
346 |
+
submit_infos_save_path = os.path.join(submit_infos_dir, submit_infos_file_name)
|
347 |
+
with open(submit_infos_save_path, 'w', encoding='utf-8') as f:
|
348 |
json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
|
349 |
|
350 |
+
# Commit the updated submit infos
|
351 |
+
API.upload_folder(
|
352 |
+
repo_id=SUBMIT_INFOS_REPO,
|
353 |
+
folder_path=submit_infos_dir,
|
354 |
+
path_in_repo=None,
|
355 |
+
commit_message="Update submission infos",
|
356 |
+
repo_type="dataset"
|
357 |
+
)
|
358 |
+
|
359 |
# Wait for the next update
|
360 |
logger.warning(f"Wait for {time_duration} seconds for the next update ...")
|
361 |
|
src/envs.py
CHANGED
@@ -15,13 +15,16 @@ REPO_ID = f"{OWNER}/leaderboard_backend"
|
|
15 |
RESULTS_REPO = f"{OWNER}/eval_results"
|
16 |
# repo for submitting the evaluation
|
17 |
SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
|
|
|
|
|
18 |
|
19 |
# If you setup a cache later, just change HF_HOME
|
20 |
CACHE_PATH = os.getenv("HF_HOME", ".")
|
21 |
HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
|
22 |
ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
|
23 |
|
24 |
-
|
|
|
25 |
|
26 |
API = HfApi(token=HF_TOKEN)
|
27 |
|
|
|
15 |
RESULTS_REPO = f"{OWNER}/eval_results"
|
16 |
# repo for submitting the evaluation
|
17 |
SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
|
18 |
+
# repo for storing the submission infos
|
19 |
+
SUBMIT_INFOS_REPO = f"{OWNER}/submit_infos"
|
20 |
|
21 |
# If you setup a cache later, just change HF_HOME
|
22 |
CACHE_PATH = os.getenv("HF_HOME", ".")
|
23 |
HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
|
24 |
ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
|
25 |
|
26 |
+
SUBMIT_INFOS_DIR = os.path.join(CACHE_PATH, "submit_infos")
|
27 |
+
SUBMIT_INFOS_FILE_NAME = "submit_infos.json"
|
28 |
|
29 |
API = HfApi(token=HF_TOKEN)
|
30 |
|