hanhainebula commited on
Commit
d6979e5
·
1 Parent(s): 77e581d

init commit: upload backend code

Browse files
Files changed (4) hide show
  1. app.py +71 -58
  2. requirements.txt +15 -1
  3. src/backend.py +284 -0
  4. src/envs.py +35 -0
app.py CHANGED
@@ -1,63 +1,76 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ import multiprocessing
4
+
5
+ from src.backend import pull_search_results
6
+ from src.envs import (
7
+ API, REPO_ID, START_COMMIT_ID,
8
+ LOG_DIR, HF_CACHE_DIR,
9
+ HF_SEARCH_RESULTS_REPO_DIR, HF_EVAL_RESULTS_REPO_DIR,
10
+ UNZIP_TARGET_DIR,
11
+ TIME_DURATION,
12
+ EVAL_K_VALUES,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  )
14
 
15
+ def restart_space():
16
+ API.restart_space(repo_id=REPO_ID)
17
+
18
+
19
+ def get_log_files():
20
+ return sorted([f for f in os.listdir(LOG_DIR) if f.endswith('.log')])
21
+
22
+
23
+ def refresh_log_files():
24
+ return get_log_files()
25
+
26
+
27
+ def display_log_content(selected_file):
28
+ if selected_file:
29
+ with open(os.path.join(LOG_DIR, selected_file), 'r', encoding='utf-8') as file:
30
+ return file.read()
31
+ return "No log file selected"
32
+
33
 
34
  if __name__ == "__main__":
35
+ process = multiprocessing.Process(
36
+ target=pull_search_results,
37
+ args=(
38
+ HF_SEARCH_RESULTS_REPO_DIR,
39
+ HF_EVAL_RESULTS_REPO_DIR,
40
+ UNZIP_TARGET_DIR,
41
+ EVAL_K_VALUES,
42
+ HF_CACHE_DIR,
43
+ TIME_DURATION,
44
+ START_COMMIT_ID,
45
+ ),
46
+ )
47
+ process.start()
48
+
49
+ with gr.Blocks() as demo:
50
+ gr.Markdown("## Select a log file to view its content")
51
+
52
+ log_file_dropdown = gr.Dropdown(
53
+ choices=refresh_log_files(),
54
+ label="Select log file",
55
+ interactive=True,
56
+ )
57
+ log_content_box = gr.Textbox(
58
+ label="Log content",
59
+ lines=20,
60
+ interactive=False,
61
+ )
62
+ refresh_button = gr.Button(
63
+ text="Refresh log files",
64
+ )
65
+
66
+ log_file_dropdown.change(
67
+ fn=display_log_content,
68
+ inputs=log_file_dropdown,
69
+ outputs=log_content_box,
70
+ )
71
+ refresh_button.click(
72
+ fn=refresh_log_files,
73
+ outputs=log_file_dropdown,
74
+ )
75
+
76
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,15 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APScheduler>=3.10.1
2
+ black>=23.11.0
3
+ click>=8.1.3
4
+ datasets>=2.14.5
5
+ gradio>=4.29.0
6
+ gradio_client>=0.16.1
7
+ huggingface-hub>=0.18.0
8
+ numpy>=1.24.2
9
+ pandas>=2.0.0
10
+ python-dateutil>=2.8.2
11
+ requests>=2.31.0
12
+ tqdm>=4.65.0
13
+ accelerate>=0.24.1
14
+ socksio>=1.0.0
15
+ air-benchmark>=0.0.4
src/backend.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import time
4
+ import shutil
5
+ import logging
6
+ import zipfile
7
+ from typing import List, Optional
8
+ from collections import defaultdict
9
+
10
+ from air_benchmark.tasks.tasks import check_benchmark_version
11
+ from air_benchmark.evaluation_utils.data_loader import DataLoader
12
+ from air_benchmark.evaluation_utils.evaluator import Evaluator
13
+
14
+ from src.envs import (
15
+ API,
16
+ LOG_DIR, ZIP_CACHE_DIR,
17
+ SEARCH_RESULTS_REPO, RESULTS_REPO
18
+ )
19
+
20
+ log_file = os.path.join(LOG_DIR, f"backend_{time.strftime('%Y-%m-%d_%H-%M-%S')}.log")
21
+
22
+ logger = logging.getLogger(__name__)
23
+ logging.basicConfig(
24
+ filename=log_file,
25
+ filemode='w',
26
+ level=logging.WARNING,
27
+ datefmt='%Y-%m-%d %H:%M:%S',
28
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
29
+ )
30
+
31
+
32
+ def compute_metrics(
33
+ benchmark_version: str,
34
+ search_results_save_dir: str,
35
+ k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
36
+ cache_dir: Optional[str] = None,
37
+ ):
38
+ data_loader = DataLoader(benchmark_version, cache_dir=cache_dir)
39
+ evaluator = Evaluator(data_loader)
40
+
41
+ eval_results = evaluator.evaluate_results(search_results_save_dir, k_values=k_values)
42
+ return eval_results
43
+
44
+
45
+ def save_evaluation_results(
46
+ eval_results: dict,
47
+ save_path: str,
48
+ model_name: str,
49
+ reranker_name: str,
50
+ model_link: Optional[str] = None,
51
+ reranker_link: Optional[str] = None,
52
+ is_anonymous: bool = False,
53
+ timestamp: str = None,
54
+ revision: str = None,
55
+ ):
56
+ results = defaultdict(list)
57
+ configs = {}
58
+
59
+ for task_type, task_type_results in eval_results.items():
60
+ for domain, domain_results in task_type_results.items():
61
+ for lang, lang_results in domain_results.items():
62
+ for dataset_name, task_results in lang_results.items():
63
+ for metric, metric_val in task_results.items():
64
+ _key = f"{model_name}_{reranker_name}_{task_type}_{metric}"
65
+ results[_key].append({
66
+ "domain": domain,
67
+ "lang": lang,
68
+ "dataset": dataset_name,
69
+ "value": metric_val,
70
+ })
71
+ configs[_key] = {
72
+ "retrieval_model": model_name,
73
+ "retrieval_model_link": model_link,
74
+ "reranking_model": reranker_name,
75
+ "reranking_model_link": reranker_link,
76
+ "task": task_type,
77
+ "metric": metric,
78
+ "timestamp": timestamp,
79
+ "is_anonymous": is_anonymous,
80
+ "revision": revision,
81
+ }
82
+
83
+ results_list = []
84
+ for k, result in results.items():
85
+ config = configs[k]
86
+ results_list.append({
87
+ "config": config,
88
+ "results": result
89
+ })
90
+ with open(save_path, 'w', encoding='utf-8') as f:
91
+ json.dump(results_list, f, ensure_ascii=False, indent=4)
92
+
93
+
94
+ def get_file_list(dir_path: str, allowed_suffixes: List[str] = None) -> List[str]:
95
+ file_paths = set()
96
+ if os.path.exists(dir_path) and os.path.isdir(dir_path):
97
+ for root, _, files in os.walk(dir_path):
98
+ for file in files:
99
+ if allowed_suffixes is None or any(
100
+ file.endswith(suffix) for suffix in allowed_suffixes
101
+ ):
102
+ file_paths.add(os.path.abspath(os.path.join(root, file)))
103
+ return file_paths
104
+
105
+
106
+ def get_zip_file_path(zip_file_name: str):
107
+ zip_file_path = None
108
+ for root, _, files in os.walk(ZIP_CACHE_DIR):
109
+ for file in files:
110
+ if file == zip_file_name:
111
+ zip_file_path = os.path.abspath(os.path.join(root, file))
112
+ break
113
+ return zip_file_path
114
+
115
+
116
+ def pull_search_results(
117
+ hf_search_results_repo_dir: str,
118
+ hf_eval_results_repo_dir: str,
119
+ unzip_target_dir: str,
120
+ k_values: List[int] = [1, 3, 5, 10, 50, 100, 1000],
121
+ cache_dir: str = None,
122
+ time_duration: int = 1800,
123
+ start_commit_id: str = None
124
+ ):
125
+ if start_commit_id is not None:
126
+ API.snapshot_download(
127
+ repo_id=SEARCH_RESULTS_REPO,
128
+ repo_type="dataset",
129
+ revision=start_commit_id,
130
+ local_dir=hf_search_results_repo_dir,
131
+ etag_timeout=30,
132
+ allow_patterns=['*.json']
133
+ )
134
+ cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
135
+ else:
136
+ cur_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
137
+
138
+ while True:
139
+ try:
140
+ API.snapshot_download(
141
+ repo_id=RESULTS_REPO,
142
+ repo_type="dataset",
143
+ local_dir=hf_eval_results_repo_dir,
144
+ etag_timeout=30
145
+ )
146
+ API.snapshot_download(
147
+ repo_id=SEARCH_RESULTS_REPO,
148
+ repo_type="dataset",
149
+ local_dir=hf_search_results_repo_dir,
150
+ etag_timeout=30,
151
+ allow_patterns=['*.json']
152
+ )
153
+ except Exception as e:
154
+ logger.error(f"Failed to download the search results or evaluation results: {e}")
155
+ logger.error(f"Wait for {time_duration} seconds for the next update ...")
156
+ time.sleep(time_duration)
157
+ continue
158
+
159
+ commit_infos_dict = defaultdict(list)
160
+
161
+ new_file_paths = get_file_list(hf_search_results_repo_dir, allowed_suffixes=['.json'])
162
+ added_file_paths = new_file_paths - cur_file_paths
163
+ for metadata_file_path in sorted(list(added_file_paths)):
164
+ with open(metadata_file_path, 'r', encoding='utf-8') as f:
165
+ metadata = json.load(f)
166
+
167
+ model_name = metadata['model_name']
168
+ model_link = None if not metadata['model_url'] else metadata['model_url']
169
+ reranker_name = metadata['reranker_name']
170
+ reranker_link = None if not metadata['reranker_url'] else metadata['reranker_url']
171
+ benchmark_version = metadata['version']
172
+
173
+ try:
174
+ check_benchmark_version(benchmark_version)
175
+ except ValueError:
176
+ logger.error(f"Invalid benchmark version `{benchmark_version}` in file `{metadata_file_path}`. Skip this commit.")
177
+ continue
178
+
179
+ file_name = os.path.basename(metadata_file_path).split('.')[0]
180
+ zip_file_name = f"{file_name}.zip"
181
+
182
+ try:
183
+ API.snapshot_download(
184
+ repo_id=SEARCH_RESULTS_REPO,
185
+ repo_type="dataset",
186
+ local_dir=ZIP_CACHE_DIR,
187
+ etag_timeout=30,
188
+ allow_patterns=[zip_file_name]
189
+ )
190
+ zip_file_path = get_zip_file_path(zip_file_name)
191
+ assert zip_file_path is not None
192
+ except Exception as e:
193
+ logger.error(f"Failed to download the zip file `{zip_file_name}`: {e}")
194
+ continue
195
+
196
+ unzip_target_path = os.path.join(unzip_target_dir, benchmark_version, file_name)
197
+ os.makedirs(unzip_target_path, exist_ok=True)
198
+ try:
199
+ with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
200
+ zip_ref.extractall(unzip_target_path)
201
+ except Exception as e:
202
+ logger.error(f"Failed to unzip the search results `{file_name}`: {e}")
203
+ continue
204
+
205
+ commit_infos_dict[benchmark_version].append({
206
+ "model_name": model_name,
207
+ "model_link": model_link,
208
+ "reranker_name": reranker_name,
209
+ "reranker_link": reranker_link,
210
+ "is_anonymous": metadata['is_anonymous'],
211
+ "file_name": file_name,
212
+ "timestamp": metadata['timestamp'],
213
+ "revision": metadata['revision'],
214
+ "search_results_dir": unzip_target_path
215
+ })
216
+
217
+ # Sort the search results by timestamp
218
+ for benchmark_version in commit_infos_dict:
219
+ commit_infos_dict[benchmark_version].sort(key=lambda x: int(os.path.basename(x["search_results_dir"]).split('-')[0]))
220
+
221
+ # Save the evaluation results
222
+ update_flag = False
223
+ new_models_set = set()
224
+ for benchmark_version, commit_infos in commit_infos_dict.items():
225
+ eval_results_dir = os.path.join(hf_eval_results_repo_dir, benchmark_version)
226
+ os.makedirs(eval_results_dir, exist_ok=True)
227
+
228
+ for commit_info in commit_infos:
229
+ try:
230
+ eval_results = compute_metrics(
231
+ benchmark_version,
232
+ commit_info['search_results_dir'],
233
+ k_values=k_values,
234
+ cache_dir=cache_dir,
235
+ )
236
+ except KeyError as e:
237
+ logger.error(f"KeyError: {e}. Skip this commit: {commit_info['file_name']}")
238
+ continue
239
+
240
+ save_dir = os.path.join(eval_results_dir, commit_info['model_name'], commit_info['reranker_name'])
241
+ os.makedirs(save_dir, exist_ok=True)
242
+ results_save_path = os.path.join(save_dir, f"results_{commit_info['file_name']}.json")
243
+ save_evaluation_results(eval_results,
244
+ results_save_path,
245
+ commit_info['model_name'],
246
+ commit_info['reranker_name'],
247
+ model_link=commit_info['model_link'],
248
+ reranker_link=commit_info['reranker_link'],
249
+ is_anonymous=commit_info['is_anonymous'],
250
+ timestamp=commit_info['timestamp'],
251
+ revision=commit_info['revision'])
252
+ new_models_set.add(f"{commit_info['model_name']}_{commit_info['reranker_name']}")
253
+
254
+ update_flag = True
255
+
256
+ # Commit the updated evaluation results
257
+ if update_flag:
258
+ commit_message = "Update evaluation results\nNew models added in this update:\n"
259
+ for new_model in new_models_set:
260
+ commit_message += f"\t- {new_model}\n"
261
+
262
+ API.upload_folder(
263
+ repo_id=RESULTS_REPO,
264
+ folder_path=hf_eval_results_repo_dir,
265
+ path_in_repo=None,
266
+ commit_message=commit_message,
267
+ repo_type="dataset"
268
+ )
269
+ logger.warning("Evaluation results updated and pushed to the remote repository.")
270
+
271
+ # Print the new models
272
+ logger.warning("=====================================")
273
+ logger.warning("New models added in this update:")
274
+ for new_model in new_models_set:
275
+ logger.warning("\t" + new_model)
276
+
277
+ # Clean the cache
278
+ shutil.rmtree(ZIP_CACHE_DIR)
279
+ shutil.rmtree(unzip_target_dir)
280
+
281
+ # Wait for the next update
282
+ logger.warning(f"Wait for {time_duration} seconds for the next update ...")
283
+ cur_file_paths = new_file_paths
284
+ time.sleep(time_duration)
src/envs.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import HfApi
3
+
4
+
5
+ # Info to change for your repository
6
+ # ----------------------------------
7
+ HF_TOKEN = os.environ.get("HF_TOKEN", "") # A read/write token for your org
8
+ START_COMMIT_ID = os.environ.get("START_COMMIT_ID", None)
9
+
10
+ OWNER = "AIR-Bench" # "nan" # Change to your org - don't forget to create a results and request dataset, with the correct format!
11
+ # ----------------------------------
12
+
13
+ REPO_ID = f"{OWNER}/leaderboard_backend"
14
+ # repo for storing the evaluation results
15
+ RESULTS_REPO = f"{OWNER}/eval_results"
16
+ # repo for submitting the evaluation
17
+ SEARCH_RESULTS_REPO = f"{OWNER}/search_results"
18
+
19
+ # If you setup a cache later, just change HF_HOME
20
+ CACHE_PATH = os.getenv("HF_HOME", ".")
21
+ HF_CACHE_DIR = os.path.join(CACHE_PATH, ".cache")
22
+ ZIP_CACHE_DIR = os.path.join(CACHE_PATH, ".zip_cache")
23
+
24
+ LOG_DIR = os.path.join(CACHE_PATH, "logs")
25
+
26
+ API = HfApi(token=HF_TOKEN)
27
+
28
+ HF_SEARCH_RESULTS_REPO_DIR = os.path.join(CACHE_PATH, "search_results")
29
+ HF_EVAL_RESULTS_REPO_DIR = os.path.join(CACHE_PATH, "eval_results")
30
+
31
+ UNZIP_TARGET_DIR = os.path.join(CACHE_PATH, "unzip_target_dir")
32
+
33
+ TIME_DURATION = 300 # seconds
34
+
35
+ EVAL_K_VALUES = [1, 3, 5, 10, 50, 100, 1000]