Spaces:
Running
Running
[email protected]
commited on
Commit
·
95f7c57
1
Parent(s):
0b78364
update
Browse files- app.py +3 -2
- src/leaderboard/read_evals.py +4 -4
- src/populate.py +2 -2
app.py
CHANGED
@@ -29,6 +29,7 @@ from src.display.utils import (
|
|
29 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
31 |
from src.submission.submit import add_new_eval
|
|
|
32 |
|
33 |
|
34 |
def restart_space():
|
@@ -52,11 +53,11 @@ except Exception:
|
|
52 |
restart_space()
|
53 |
|
54 |
|
55 |
-
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
56 |
|
57 |
print(ASSET_COLS)
|
58 |
|
59 |
-
ASSET_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, ASSET_COLS, ASSET_BENCHMARK_COLS)
|
60 |
|
61 |
|
62 |
(
|
|
|
29 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
|
30 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
31 |
from src.submission.submit import add_new_eval
|
32 |
+
from src.about import Tasks, AssetTasks
|
33 |
|
34 |
|
35 |
def restart_space():
|
|
|
53 |
restart_space()
|
54 |
|
55 |
|
56 |
+
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, Tasks)
|
57 |
|
58 |
print(ASSET_COLS)
|
59 |
|
60 |
+
ASSET_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, ASSET_COLS, ASSET_BENCHMARK_COLS, AssetTasks)
|
61 |
|
62 |
|
63 |
(
|
src/leaderboard/read_evals.py
CHANGED
@@ -33,7 +33,7 @@ class EvalResult:
|
|
33 |
still_on_hub: bool = False
|
34 |
|
35 |
@classmethod
|
36 |
-
def init_from_json_file(self, json_filepath):
|
37 |
"""Inits the result from the specific model result file"""
|
38 |
with open(json_filepath) as fp:
|
39 |
data = json.load(fp)
|
@@ -68,7 +68,7 @@ class EvalResult:
|
|
68 |
|
69 |
# Extract results available in this file (some results are split in several files)
|
70 |
results = {}
|
71 |
-
for task in
|
72 |
task = task.value
|
73 |
|
74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
@@ -154,7 +154,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
154 |
return request_file
|
155 |
|
156 |
|
157 |
-
def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
|
158 |
"""From the path of the results folder root, extract all needed info for results"""
|
159 |
model_result_filepaths = []
|
160 |
for root, _, files in os.walk(results_path):
|
@@ -173,7 +173,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
173 |
eval_results = {}
|
174 |
for model_result_filepath in model_result_filepaths:
|
175 |
# Creation of result
|
176 |
-
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
177 |
eval_result.update_with_request_file(requests_path)
|
178 |
|
179 |
# Store results of same eval together
|
|
|
33 |
still_on_hub: bool = False
|
34 |
|
35 |
@classmethod
|
36 |
+
def init_from_json_file(self, json_filepath, task_class):
|
37 |
"""Inits the result from the specific model result file"""
|
38 |
with open(json_filepath) as fp:
|
39 |
data = json.load(fp)
|
|
|
68 |
|
69 |
# Extract results available in this file (some results are split in several files)
|
70 |
results = {}
|
71 |
+
for task in task_class:
|
72 |
task = task.value
|
73 |
|
74 |
# We average all scores of a given metric (not all metrics are present in all files)
|
|
|
154 |
return request_file
|
155 |
|
156 |
|
157 |
+
def get_raw_eval_results(results_path: str, requests_path: str, task_class) -> list[EvalResult]:
|
158 |
"""From the path of the results folder root, extract all needed info for results"""
|
159 |
model_result_filepaths = []
|
160 |
for root, _, files in os.walk(results_path):
|
|
|
173 |
eval_results = {}
|
174 |
for model_result_filepath in model_result_filepaths:
|
175 |
# Creation of result
|
176 |
+
eval_result = EvalResult.init_from_json_file(model_result_filepath, task_class)
|
177 |
eval_result.update_with_request_file(requests_path)
|
178 |
|
179 |
# Store results of same eval together
|
src/populate.py
CHANGED
@@ -8,10 +8,10 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
10 |
|
11 |
-
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
12 |
"""Creates a dataframe from all the individual experiment results"""
|
13 |
print(results_path, requests_path)
|
14 |
-
raw_data = get_raw_eval_results(results_path, requests_path)
|
15 |
print(raw_data)
|
16 |
all_data_json = [v.to_dict() for v in raw_data]
|
17 |
|
|
|
8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
9 |
|
10 |
|
11 |
+
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list, task_class) -> pd.DataFrame:
|
12 |
"""Creates a dataframe from all the individual experiment results"""
|
13 |
print(results_path, requests_path)
|
14 |
+
raw_data = get_raw_eval_results(results_path, requests_path, task_class)
|
15 |
print(raw_data)
|
16 |
all_data_json = [v.to_dict() for v in raw_data]
|
17 |
|