Spaces:
Runtime error
Runtime error
Commit
·
a88d51c
1
Parent(s):
9bfc5f2
update
Browse files- backend-cli.py +5 -3
- src/leaderboard/read_evals.py +4 -3
- submit-cli.py +1 -1
backend-cli.py
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
import os
|
| 4 |
import json
|
| 5 |
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
|
| 8 |
from huggingface_hub import snapshot_download
|
|
@@ -99,7 +100,6 @@ def process_finished_requests() -> bool:
|
|
| 99 |
# Sort the evals by priority (first submitted first run)
|
| 100 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
| 101 |
|
| 102 |
-
import random
|
| 103 |
random.shuffle(eval_requests)
|
| 104 |
|
| 105 |
from src.leaderboard.read_evals import get_raw_eval_results
|
|
@@ -115,8 +115,11 @@ def process_finished_requests() -> bool:
|
|
| 115 |
from typing import Optional
|
| 116 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
| 117 |
|
|
|
|
|
|
|
|
|
|
| 118 |
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
| 119 |
-
for task in
|
| 120 |
task_name = task.benchmark
|
| 121 |
|
| 122 |
if eval_result is None or task_name not in eval_result.results:
|
|
@@ -145,7 +148,6 @@ def process_pending_requests() -> bool:
|
|
| 145 |
# Sort the evals by priority (first submitted first run)
|
| 146 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
| 147 |
|
| 148 |
-
import random
|
| 149 |
random.shuffle(eval_requests)
|
| 150 |
|
| 151 |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
|
|
|
|
| 3 |
import os
|
| 4 |
import json
|
| 5 |
|
| 6 |
+
import random
|
| 7 |
from datetime import datetime
|
| 8 |
|
| 9 |
from huggingface_hub import snapshot_download
|
|
|
|
| 100 |
# Sort the evals by priority (first submitted first run)
|
| 101 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
| 102 |
|
|
|
|
| 103 |
random.shuffle(eval_requests)
|
| 104 |
|
| 105 |
from src.leaderboard.read_evals import get_raw_eval_results
|
|
|
|
| 115 |
from typing import Optional
|
| 116 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
| 117 |
|
| 118 |
+
task_lst = TASKS_HARNESS.copy()
|
| 119 |
+
random.shuffle(task_lst)
|
| 120 |
+
|
| 121 |
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
| 122 |
+
for task in task_lst:
|
| 123 |
task_name = task.benchmark
|
| 124 |
|
| 125 |
if eval_result is None or task_name not in eval_result.results:
|
|
|
|
| 148 |
# Sort the evals by priority (first submitted first run)
|
| 149 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
| 150 |
|
|
|
|
| 151 |
random.shuffle(eval_requests)
|
| 152 |
|
| 153 |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -133,7 +133,7 @@ class EvalResult:
|
|
| 133 |
self.num_params = request.get("params", 0)
|
| 134 |
self.date = request.get("submitted_time", "")
|
| 135 |
except Exception:
|
| 136 |
-
print(f"Could not find request file for {self.org}/{self.model}")
|
| 137 |
|
| 138 |
def is_complete(self) -> bool:
|
| 139 |
for task in Tasks:
|
|
@@ -169,7 +169,7 @@ class EvalResult:
|
|
| 169 |
|
| 170 |
|
| 171 |
def get_request_file_for_model(requests_path, model_name, precision):
|
| 172 |
-
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
| 173 |
request_files = os.path.join(
|
| 174 |
requests_path,
|
| 175 |
f"{model_name}_eval_request_*.json",
|
|
@@ -179,11 +179,12 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
| 179 |
# Select correct request file (precision)
|
| 180 |
request_file = ""
|
| 181 |
request_files = sorted(request_files, reverse=True)
|
|
|
|
| 182 |
for tmp_request_file in request_files:
|
| 183 |
with open(tmp_request_file, "r") as f:
|
| 184 |
req_content = json.load(f)
|
| 185 |
if (
|
| 186 |
-
req_content["status"] in ["FINISHED"]
|
| 187 |
and req_content["precision"] == precision.split(".")[-1]
|
| 188 |
):
|
| 189 |
request_file = tmp_request_file
|
|
|
|
| 133 |
self.num_params = request.get("params", 0)
|
| 134 |
self.date = request.get("submitted_time", "")
|
| 135 |
except Exception:
|
| 136 |
+
print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path}")
|
| 137 |
|
| 138 |
def is_complete(self) -> bool:
|
| 139 |
for task in Tasks:
|
|
|
|
| 169 |
|
| 170 |
|
| 171 |
def get_request_file_for_model(requests_path, model_name, precision):
|
| 172 |
+
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED and RUNNING"""
|
| 173 |
request_files = os.path.join(
|
| 174 |
requests_path,
|
| 175 |
f"{model_name}_eval_request_*.json",
|
|
|
|
| 179 |
# Select correct request file (precision)
|
| 180 |
request_file = ""
|
| 181 |
request_files = sorted(request_files, reverse=True)
|
| 182 |
+
# print('XXX', request_files)
|
| 183 |
for tmp_request_file in request_files:
|
| 184 |
with open(tmp_request_file, "r") as f:
|
| 185 |
req_content = json.load(f)
|
| 186 |
if (
|
| 187 |
+
req_content["status"] in ["FINISHED", "RUNNING"]
|
| 188 |
and req_content["precision"] == precision.split(".")[-1]
|
| 189 |
):
|
| 190 |
request_file = tmp_request_file
|
submit-cli.py
CHANGED
|
@@ -118,7 +118,7 @@ def main():
|
|
| 118 |
|
| 119 |
filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
|
| 120 |
|
| 121 |
-
for i in range(min(
|
| 122 |
model = filtered_model_lst[i]
|
| 123 |
|
| 124 |
print(f'Considering {model.id} ..')
|
|
|
|
| 118 |
|
| 119 |
filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
|
| 120 |
|
| 121 |
+
for i in range(min(200, len(filtered_model_lst))):
|
| 122 |
model = filtered_model_lst[i]
|
| 123 |
|
| 124 |
print(f'Considering {model.id} ..')
|