hanhainebula commited on
Commit
f58e22b
·
1 Parent(s): 40aae8d

fix issue: auto-loading submit infos

Browse files
Files changed (3) hide show
  1. app.py +19 -6
  2. src/backend.py +28 -6
  3. src/envs.py +4 -1
app.py CHANGED
@@ -7,9 +7,9 @@ import multiprocessing
7
 
8
  from src.backend import pull_search_results
9
  from src.envs import (
10
- API, REPO_ID, START_COMMIT_ID,
11
- HF_CACHE_DIR, SUBMIT_INFOS_SAVE_PATH,
12
- HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
13
  UNZIP_TARGET_DIR,
14
  TIME_DURATION,
15
  EVAL_K_VALUES,
@@ -20,7 +20,9 @@ from src.css_html_js import custom_css
20
  logger = logging.getLogger(__name__)
21
  logging.basicConfig(
22
  level=logging.WARNING,
23
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 
 
24
  )
25
 
26
 
@@ -29,8 +31,17 @@ logging.basicConfig(
29
 
30
 
31
  def load_submit_infos_df():
32
- if os.path.exists(SUBMIT_INFOS_SAVE_PATH):
33
- with open(SUBMIT_INFOS_SAVE_PATH, 'r', encoding='utf-8') as f:
 
 
 
 
 
 
 
 
 
34
  submit_infos = json.load(f)
35
  else:
36
  submit_infos = []
@@ -65,6 +76,8 @@ if __name__ == "__main__":
65
  HF_SEARCH_RESULTS_REPO_DIR,
66
  HF_EVAL_RESULTS_REPO_DIR,
67
  UNZIP_TARGET_DIR,
 
 
68
  EVAL_K_VALUES,
69
  HF_CACHE_DIR,
70
  TIME_DURATION,
 
7
 
8
  from src.backend import pull_search_results
9
  from src.envs import (
10
+ API, START_COMMIT_ID,
11
+ HF_CACHE_DIR, SUBMIT_INFOS_DIR, SUBMIT_INFOS_FILE_NAME,
12
+ HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR, SUBMIT_INFOS_REPO,
13
  UNZIP_TARGET_DIR,
14
  TIME_DURATION,
15
  EVAL_K_VALUES,
 
20
  logger = logging.getLogger(__name__)
21
  logging.basicConfig(
22
  level=logging.WARNING,
23
+ datefmt='%Y-%m-%d %H:%M:%S',
24
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
25
+ force=True
26
  )
27
 
28
 
 
31
 
32
 
33
  def load_submit_infos_df():
34
+ # Pull the submit infos
35
+ API.snapshot_download(
36
+ repo_id=SUBMIT_INFOS_REPO,
37
+ repo_type="dataset",
38
+ local_dir=SUBMIT_INFOS_DIR,
39
+ etag_timeout=30
40
+ )
41
+ submit_infos_save_path = os.path.join(SUBMIT_INFOS_DIR, SUBMIT_INFOS_FILE_NAME)
42
+
43
+ if os.path.exists(submit_infos_save_path):
44
+ with open(submit_infos_save_path, 'r', encoding='utf-8') as f:
45
  submit_infos = json.load(f)
46
  else:
47
  submit_infos = []
 
76
  HF_SEARCH_RESULTS_REPO_DIR,
77
  HF_EVAL_RESULTS_REPO_DIR,
78
  UNZIP_TARGET_DIR,
79
+ SUBMIT_INFOS_DIR,
80
+ SUBMIT_INFOS_FILE_NAME,
81
  EVAL_K_VALUES,
82
  HF_CACHE_DIR,
83
  TIME_DURATION,
src/backend.py CHANGED
@@ -13,15 +13,17 @@ from air_benchmark.evaluation_utils.evaluator import Evaluator
13
 
14
  from src.envs import (
15
  API,
16
- ZIP_CACHE_DIR,SUBMIT_INFOS_SAVE_PATH,
17
- SEARCH_RESULTS_REPO, RESULTS_REPO,
18
  make_clickable_model
19
  )
20
 
21
  logger = logging.getLogger(__name__)
22
  logging.basicConfig(
23
  level=logging.WARNING,
24
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 
 
25
  )
26
 
27
 
@@ -156,12 +158,22 @@ def pull_search_results(
156
  hf_search_results_repo_dir: str,
157
  hf_eval_results_repo_dir: str,
158
  unzip_target_dir: str,
 
 
159
  k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
160
  cache_dir: str = None,
161
  time_duration: int = 1800,
162
  start_commit_id: str = None
163
  ):
164
- print("Start from commit:", start_commit_id)
 
 
 
 
 
 
 
 
165
  if start_commit_id is not None:
166
  API.snapshot_download(
167
  repo_id=SEARCH_RESULTS_REPO,
@@ -182,7 +194,7 @@ def pull_search_results(
182
  )
183
  cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
184
 
185
- print("Start to pull new search results ...")
186
  while True:
187
  os.makedirs(ZIP_CACHE_DIR, exist_ok=True)
188
  os.makedirs(unzip_target_dir, exist_ok=True)
@@ -331,9 +343,19 @@ def pull_search_results(
331
  # update submit infos
332
  cur_file_paths = new_file_paths
333
  submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
334
- with open(SUBMIT_INFOS_SAVE_PATH, 'w', encoding='utf-8') as f:
 
335
  json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
336
 
 
 
 
 
 
 
 
 
 
337
  # Wait for the next update
338
  logger.warning(f"Wait for {time_duration} seconds for the next update ...")
339
 
 
13
 
14
  from src.envs import (
15
  API,
16
+ ZIP_CACHE_DIR,
17
+ SEARCH_RESULTS_REPO, RESULTS_REPO, SUBMIT_INFOS_REPO,
18
  make_clickable_model
19
  )
20
 
21
  logger = logging.getLogger(__name__)
22
  logging.basicConfig(
23
  level=logging.WARNING,
24
+ datefmt='%Y-%m-%d %H:%M:%S',
25
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
26
+ force=True
27
  )
28
 
29
 
 
158
  hf_search_results_repo_dir: str,
159
  hf_eval_results_repo_dir: str,
160
  unzip_target_dir: str,
161
+ submit_infos_dir: str,
162
+ submit_infos_file_name: str = "submit_infos.json",
163
  k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
164
  cache_dir: str = None,
165
  time_duration: int = 1800,
166
  start_commit_id: str = None
167
  ):
168
+ # Pull the submit infos
169
+ API.snapshot_download(
170
+ repo_id=SUBMIT_INFOS_REPO,
171
+ repo_type="dataset",
172
+ local_dir=submit_infos_dir,
173
+ etag_timeout=30
174
+ )
175
+
176
+ logger.warning(f"Start from commit: {start_commit_id}")
177
  if start_commit_id is not None:
178
  API.snapshot_download(
179
  repo_id=SEARCH_RESULTS_REPO,
 
194
  )
195
  cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
196
 
197
+ logger.warning("Start to pull new search results ...")
198
  while True:
199
  os.makedirs(ZIP_CACHE_DIR, exist_ok=True)
200
  os.makedirs(unzip_target_dir, exist_ok=True)
 
343
  # update submit infos
344
  cur_file_paths = new_file_paths
345
  submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
346
+ submit_infos_save_path = os.path.join(submit_infos_dir, submit_infos_file_name)
347
+ with open(submit_infos_save_path, 'w', encoding='utf-8') as f:
348
  json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
349
 
350
+ # Commit the updated submit infos
351
+ API.upload_folder(
352
+ repo_id=SUBMIT_INFOS_REPO,
353
+ folder_path=submit_infos_dir,
354
+ path_in_repo=None,
355
+ commit_message="Update submission infos",
356
+ repo_type="dataset"
357
+ )
358
+
359
  # Wait for the next update
360
  logger.warning(f"Wait for {time_duration} seconds for the next update ...")
361
 
src/envs.py CHANGED
@@ -15,13 +15,16 @@ REPO_ID = f"{OWNER}/leaderboard_backend"
15
  RESULTS_REPO = f"{OWNER}/eval_results"
16
  # repo for submitting the evaluation
17
  SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
 
 
18
 
19
  # If you setup a cache later, just change HF_HOME
20
  CACHE_PATH = os.getenv("HF_HOME", ".")
21
  HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
22
  ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
23
 
24
- SUBMIT_INFOS_SAVE_PATH = os.path.join(CACHE_PATH, "submit_infos.json")
 
25
 
26
  API = HfApi(token=HF_TOKEN)
27
 
 
15
  RESULTS_REPO = f"{OWNER}/eval_results"
16
  # repo for submitting the evaluation
17
  SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
18
+ # repo for storing the submission infos
19
+ SUBMIT_INFOS_REPO = f"{OWNER}/submit_infos"
20
 
21
  # If you setup a cache later, just change HF_HOME
22
  CACHE_PATH = os.getenv("HF_HOME", ".")
23
  HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
24
  ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
25
 
26
+ SUBMIT_INFOS_DIR = os.path.join(CACHE_PATH, "submit_infos")
27
+ SUBMIT_INFOS_FILE_NAME = "submit_infos.json"
28
 
29
  API = HfApi(token=HF_TOKEN)
30