pminervini commited on
Commit
9fbeaa1
·
1 Parent(s): f6e5d38
Files changed (2) hide show
  1. backend-cli.py +3 -2
  2. src/leaderboard/read_evals.py +0 -10
backend-cli.py CHANGED
@@ -112,13 +112,14 @@ def process_finished_requests() -> bool:
112
  result_name: str = request_to_result_name(eval_request)
113
 
114
  # Check the corresponding result
115
- eval_result: EvalResult = result_name_to_result[result_name]
 
116
 
117
  # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
118
  for task in TASKS_HARNESS:
119
  task_name = task.benchmark
120
 
121
- if task_name not in eval_result.results:
122
  eval_request: EvalRequest = result_name_to_request[result_name]
123
 
124
  set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO,
 
112
  result_name: str = request_to_result_name(eval_request)
113
 
114
  # Check the corresponding result
115
+ from typing import Optional
116
+ eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
117
 
118
  # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
119
  for task in TASKS_HARNESS:
120
  task_name = task.benchmark
121
 
122
+ if eval_result is None or task_name not in eval_result.results:
123
  eval_request: EvalRequest = result_name_to_request[result_name]
124
 
125
  set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO,
src/leaderboard/read_evals.py CHANGED
@@ -103,16 +103,6 @@ class EvalResult:
103
  mean_acc = np.mean(accs) * 100.0
104
  results[task.benchmark] = mean_acc
105
 
106
- # print(json_filepath, results)
107
-
108
- # XXX
109
- # if 'nq_open' not in results:
110
- #  results['nq_open'] = 0.0
111
-
112
- # XXX
113
- # if 'triviaqa' not in results:
114
- # results['triviaqa'] = 0.0
115
-
116
  return EvalResult(eval_name=result_key, full_model=full_model, org=org, model=model, results=results,
117
  precision=precision, revision=config.get("model_sha", ""), still_on_hub=still_on_hub,
118
  architecture=architecture)
 
103
  mean_acc = np.mean(accs) * 100.0
104
  results[task.benchmark] = mean_acc
105
 
 
 
 
 
 
 
 
 
 
 
106
  return EvalResult(eval_name=result_key, full_model=full_model, org=org, model=model, results=results,
107
  precision=precision, revision=config.get("model_sha", ""), still_on_hub=still_on_hub,
108
  architecture=architecture)