[email protected] commited on
Commit
95f7c57
·
1 Parent(s): 0b78364
Files changed (3) hide show
  1. app.py +3 -2
  2. src/leaderboard/read_evals.py +4 -4
  3. src/populate.py +2 -2
app.py CHANGED
@@ -29,6 +29,7 @@ from src.display.utils import (
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
  from src.submission.submit import add_new_eval
 
32
 
33
 
34
  def restart_space():
@@ -52,11 +53,11 @@ except Exception:
52
  restart_space()
53
 
54
 
55
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
56
 
57
  print(ASSET_COLS)
58
 
59
- ASSET_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, ASSET_COLS, ASSET_BENCHMARK_COLS)
60
 
61
 
62
  (
 
29
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
30
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
31
  from src.submission.submit import add_new_eval
32
+ from src.about import Tasks, AssetTasks
33
 
34
 
35
  def restart_space():
 
53
  restart_space()
54
 
55
 
56
+ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, Tasks)
57
 
58
  print(ASSET_COLS)
59
 
60
+ ASSET_LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, ASSET_COLS, ASSET_BENCHMARK_COLS, AssetTasks)
61
 
62
 
63
  (
src/leaderboard/read_evals.py CHANGED
@@ -33,7 +33,7 @@ class EvalResult:
33
  still_on_hub: bool = False
34
 
35
  @classmethod
36
- def init_from_json_file(self, json_filepath):
37
  """Inits the result from the specific model result file"""
38
  with open(json_filepath) as fp:
39
  data = json.load(fp)
@@ -68,7 +68,7 @@ class EvalResult:
68
 
69
  # Extract results available in this file (some results are split in several files)
70
  results = {}
71
- for task in Tasks:
72
  task = task.value
73
 
74
  # We average all scores of a given metric (not all metrics are present in all files)
@@ -154,7 +154,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
154
  return request_file
155
 
156
 
157
- def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
158
  """From the path of the results folder root, extract all needed info for results"""
159
  model_result_filepaths = []
160
  for root, _, files in os.walk(results_path):
@@ -173,7 +173,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
173
  eval_results = {}
174
  for model_result_filepath in model_result_filepaths:
175
  # Creation of result
176
- eval_result = EvalResult.init_from_json_file(model_result_filepath)
177
  eval_result.update_with_request_file(requests_path)
178
 
179
  # Store results of same eval together
 
33
  still_on_hub: bool = False
34
 
35
  @classmethod
36
+ def init_from_json_file(self, json_filepath, task_class):
37
  """Inits the result from the specific model result file"""
38
  with open(json_filepath) as fp:
39
  data = json.load(fp)
 
68
 
69
  # Extract results available in this file (some results are split in several files)
70
  results = {}
71
+ for task in task_class:
72
  task = task.value
73
 
74
  # We average all scores of a given metric (not all metrics are present in all files)
 
154
  return request_file
155
 
156
 
157
+ def get_raw_eval_results(results_path: str, requests_path: str, task_class) -> list[EvalResult]:
158
  """From the path of the results folder root, extract all needed info for results"""
159
  model_result_filepaths = []
160
  for root, _, files in os.walk(results_path):
 
173
  eval_results = {}
174
  for model_result_filepath in model_result_filepaths:
175
  # Creation of result
176
+ eval_result = EvalResult.init_from_json_file(model_result_filepath, task_class)
177
  eval_result.update_with_request_file(requests_path)
178
 
179
  # Store results of same eval together
src/populate.py CHANGED
@@ -8,10 +8,10 @@ from src.display.utils import AutoEvalColumn, EvalQueueColumn
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
- def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  print(results_path, requests_path)
14
- raw_data = get_raw_eval_results(results_path, requests_path)
15
  print(raw_data)
16
  all_data_json = [v.to_dict() for v in raw_data]
17
 
 
8
  from src.leaderboard.read_evals import get_raw_eval_results
9
 
10
 
11
+ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list, task_class) -> pd.DataFrame:
12
  """Creates a dataframe from all the individual experiment results"""
13
  print(results_path, requests_path)
14
+ raw_data = get_raw_eval_results(results_path, requests_path, task_class)
15
  print(raw_data)
16
  all_data_json = [v.to_dict() for v in raw_data]
17