Spaces:

AIR-Bench
/

leaderboard_backend

Running

App Files Files Community

hanhainebula commited on Sep 6, 2024

Commit

f58e22b

1 Parent(s): 40aae8d

fix issue: auto-loading submit infos

Browse files

Files changed (3) hide show

app.py +19 -6
src/backend.py +28 -6
src/envs.py +4 -1

app.py CHANGED Viewed

@@ -7,9 +7,9 @@ import multiprocessing
 from src.backend import pull_search_results
 from src.envs import (
-    API, REPO_ID, START_COMMIT_ID,
-    HF_CACHE_DIR, SUBMIT_INFOS_SAVE_PATH,
-    HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
     UNZIP_TARGET_DIR,
     TIME_DURATION,
     EVAL_K_VALUES,
@@ -20,7 +20,9 @@ from src.css_html_js import custom_css
 logger = logging.getLogger(__name__)
 logging.basicConfig(
     level=logging.WARNING,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
@@ -29,8 +31,17 @@ logging.basicConfig(
 def load_submit_infos_df():
-    if os.path.exists(SUBMIT_INFOS_SAVE_PATH):
-        with open(SUBMIT_INFOS_SAVE_PATH, 'r', encoding='utf-8') as f:
             submit_infos = json.load(f)
     else:
         submit_infos = []
@@ -65,6 +76,8 @@ if __name__ == "__main__":
             HF_SEARCH_RESULTS_REPO_DIR,
             HF_EVAL_RESULTS_REPO_DIR,
             UNZIP_TARGET_DIR,
             EVAL_K_VALUES,
             HF_CACHE_DIR,
             TIME_DURATION,

 from src.backend import pull_search_results
 from src.envs import (
+    API, START_COMMIT_ID,
+    HF_CACHE_DIR, SUBMIT_INFOS_DIR, SUBMIT_INFOS_FILE_NAME,
+    HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR, SUBMIT_INFOS_REPO,
     UNZIP_TARGET_DIR,
     TIME_DURATION,
     EVAL_K_VALUES,
 logger = logging.getLogger(__name__)
 logging.basicConfig(
     level=logging.WARNING,
+    datefmt='%Y-%m-%d %H:%M:%S',
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    force=True
 )
 def load_submit_infos_df():
+    # Pull the submit infos
+    API.snapshot_download(
+        repo_id=SUBMIT_INFOS_REPO,
+        repo_type="dataset",
+        local_dir=SUBMIT_INFOS_DIR,
+        etag_timeout=30
+    )
+    submit_infos_save_path = os.path.join(SUBMIT_INFOS_DIR, SUBMIT_INFOS_FILE_NAME)
+    if os.path.exists(submit_infos_save_path):
+        with open(submit_infos_save_path, 'r', encoding='utf-8') as f:
             submit_infos = json.load(f)
     else:
         submit_infos = []
             HF_SEARCH_RESULTS_REPO_DIR,
             HF_EVAL_RESULTS_REPO_DIR,
             UNZIP_TARGET_DIR,
+            SUBMIT_INFOS_DIR,
+            SUBMIT_INFOS_FILE_NAME,
             EVAL_K_VALUES,
             HF_CACHE_DIR,
             TIME_DURATION,

src/backend.py CHANGED Viewed

@@ -13,15 +13,17 @@ from air_benchmark.evaluation_utils.evaluator import Evaluator
 from src.envs import (
     API,
-    ZIP_CACHE_DIR,SUBMIT_INFOS_SAVE_PATH,
-    SEARCH_RESULTS_REPO, RESULTS_REPO,
     make_clickable_model
 )
 logger = logging.getLogger(__name__)
 logging.basicConfig(
     level=logging.WARNING,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
@@ -156,12 +158,22 @@ def pull_search_results(
     hf_search_results_repo_dir: str,
     hf_eval_results_repo_dir: str,
     unzip_target_dir: str,
     k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
     cache_dir: str = None,
     time_duration: int = 1800,
     start_commit_id: str = None
 ):
-    print("Start from commit:", start_commit_id)
     if start_commit_id is not None:
         API.snapshot_download(
             repo_id=SEARCH_RESULTS_REPO,
@@ -182,7 +194,7 @@ def pull_search_results(
         )
         cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
-    print("Start to pull new search results ...")
     while True:
         os.makedirs(ZIP_CACHE_DIR, exist_ok=True)
         os.makedirs(unzip_target_dir, exist_ok=True)
@@ -331,9 +343,19 @@ def pull_search_results(
         # update submit infos
         cur_file_paths = new_file_paths
         submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
-        with open(SUBMIT_INFOS_SAVE_PATH, 'w', encoding='utf-8') as f:
             json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
         # Wait for the next update
         logger.warning(f"Wait for {time_duration} seconds for the next update ...")

 from src.envs import (
     API,
+    ZIP_CACHE_DIR,
+    SEARCH_RESULTS_REPO, RESULTS_REPO, SUBMIT_INFOS_REPO,
     make_clickable_model
 )
 logger = logging.getLogger(__name__)
 logging.basicConfig(
     level=logging.WARNING,
+    datefmt='%Y-%m-%d %H:%M:%S',
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    force=True
 )
     hf_search_results_repo_dir: str,
     hf_eval_results_repo_dir: str,
     unzip_target_dir: str,
+    submit_infos_dir: str,
+    submit_infos_file_name: str = "submit_infos.json",
     k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
     cache_dir: str = None,
     time_duration: int = 1800,
     start_commit_id: str = None
 ):
+    # Pull the submit infos
+    API.snapshot_download(
+        repo_id=SUBMIT_INFOS_REPO,
+        repo_type="dataset",
+        local_dir=submit_infos_dir,
+        etag_timeout=30
+    )
+    logger.warning(f"Start from commit: {start_commit_id}")
     if start_commit_id is not None:
         API.snapshot_download(
             repo_id=SEARCH_RESULTS_REPO,
         )
         cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
+    logger.warning("Start to pull new search results ...")
     while True:
         os.makedirs(ZIP_CACHE_DIR, exist_ok=True)
         os.makedirs(unzip_target_dir, exist_ok=True)
         # update submit infos
         cur_file_paths = new_file_paths
         submit_infos_list = get_submit_infos_list(cur_file_paths, hf_eval_results_repo_dir)
+        submit_infos_save_path = os.path.join(submit_infos_dir, submit_infos_file_name)
+        with open(submit_infos_save_path, 'w', encoding='utf-8') as f:
             json.dump(submit_infos_list, f, ensure_ascii=False, indent=4)
+        # Commit the updated submit infos
+        API.upload_folder(
+            repo_id=SUBMIT_INFOS_REPO,
+            folder_path=submit_infos_dir,
+            path_in_repo=None,
+            commit_message="Update submission infos",
+            repo_type="dataset"
+        )
         # Wait for the next update
         logger.warning(f"Wait for {time_duration} seconds for the next update ...")

src/envs.py CHANGED Viewed

@@ -15,13 +15,16 @@ REPO_ID = f"{OWNER}/leaderboard_backend"
 RESULTS_REPO = f"{OWNER}/eval_results"
 # repo for submitting the evaluation
 SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH = os.getenv("HF_HOME", ".")
 HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
 ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
-SUBMIT_INFOS_SAVE_PATH = os.path.join(CACHE_PATH, "submit_infos.json")
 API = HfApi(token=HF_TOKEN)

 RESULTS_REPO = f"{OWNER}/eval_results"
 # repo for submitting the evaluation
 SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
+# repo for storing the submission infos
+SUBMIT_INFOS_REPO = f"{OWNER}/submit_infos"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH = os.getenv("HF_HOME", ".")
 HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
 ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
+SUBMIT_INFOS_DIR = os.path.join(CACHE_PATH, "submit_infos")
+SUBMIT_INFOS_FILE_NAME = "submit_infos.json"
 API = HfApi(token=HF_TOKEN)