Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
·
9fbeaa1
1
Parent(s):
f6e5d38
update
Browse files- backend-cli.py +3 -2
- src/leaderboard/read_evals.py +0 -10
backend-cli.py
CHANGED
@@ -112,13 +112,14 @@ def process_finished_requests() -> bool:
|
|
112 |
result_name: str = request_to_result_name(eval_request)
|
113 |
|
114 |
# Check the corresponding result
|
115 |
-
|
|
|
116 |
|
117 |
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
118 |
for task in TASKS_HARNESS:
|
119 |
task_name = task.benchmark
|
120 |
|
121 |
-
if task_name not in eval_result.results:
|
122 |
eval_request: EvalRequest = result_name_to_request[result_name]
|
123 |
|
124 |
set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO,
|
|
|
112 |
result_name: str = request_to_result_name(eval_request)
|
113 |
|
114 |
# Check the corresponding result
|
115 |
+
from typing import Optional
|
116 |
+
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
117 |
|
118 |
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
119 |
for task in TASKS_HARNESS:
|
120 |
task_name = task.benchmark
|
121 |
|
122 |
+
if eval_result is None or task_name not in eval_result.results:
|
123 |
eval_request: EvalRequest = result_name_to_request[result_name]
|
124 |
|
125 |
set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO,
|
src/leaderboard/read_evals.py
CHANGED
@@ -103,16 +103,6 @@ class EvalResult:
|
|
103 |
mean_acc = np.mean(accs) * 100.0
|
104 |
results[task.benchmark] = mean_acc
|
105 |
|
106 |
-
# print(json_filepath, results)
|
107 |
-
|
108 |
-
# XXX
|
109 |
-
# if 'nq_open' not in results:
|
110 |
-
# results['nq_open'] = 0.0
|
111 |
-
|
112 |
-
# XXX
|
113 |
-
# if 'triviaqa' not in results:
|
114 |
-
# results['triviaqa'] = 0.0
|
115 |
-
|
116 |
return EvalResult(eval_name=result_key, full_model=full_model, org=org, model=model, results=results,
|
117 |
precision=precision, revision=config.get("model_sha", ""), still_on_hub=still_on_hub,
|
118 |
architecture=architecture)
|
|
|
103 |
mean_acc = np.mean(accs) * 100.0
|
104 |
results[task.benchmark] = mean_acc
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
return EvalResult(eval_name=result_key, full_model=full_model, org=org, model=model, results=results,
|
107 |
precision=precision, revision=config.get("model_sha", ""), still_on_hub=still_on_hub,
|
108 |
architecture=architecture)
|