Spaces:
Running
Running
[email protected]
commited on
Commit
Β·
2d95777
1
Parent(s):
c4a1d6e
update
Browse files- app.py +1 -1
- src/populate.py +2 -2
app.py
CHANGED
|
@@ -138,7 +138,7 @@ with demo:
|
|
| 138 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 139 |
leaderboard = init_leaderboard(LEADERBOARD_DF, AutoEvalColumn)
|
| 140 |
|
| 141 |
-
with gr.TabItem("
|
| 142 |
leaderboard = init_leaderboard(ASSET_LEADERBOARD_DF, AutoEvalColumnAsset)
|
| 143 |
|
| 144 |
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=2):
|
|
|
|
| 138 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 139 |
leaderboard = init_leaderboard(LEADERBOARD_DF, AutoEvalColumn)
|
| 140 |
|
| 141 |
+
with gr.TabItem("π οΈ Asset Benchmark", elem_id="llm-benchmark-asset-tab-table", id=1):
|
| 142 |
leaderboard = init_leaderboard(ASSET_LEADERBOARD_DF, AutoEvalColumnAsset)
|
| 143 |
|
| 144 |
with gr.TabItem("π Performance Plot", elem_id="llm-benchmark-tab-table", id=2):
|
src/populate.py
CHANGED
|
@@ -4,7 +4,7 @@ import os
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 7 |
-
from src.display.utils import
|
| 8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 9 |
|
| 10 |
|
|
@@ -17,7 +17,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 17 |
|
| 18 |
df = pd.DataFrame.from_records(all_data_json)
|
| 19 |
print(df)
|
| 20 |
-
df = df.sort_values(by=[
|
| 21 |
df = df[cols].round(decimals=2)
|
| 22 |
|
| 23 |
# filter out if any of the benchmarks have not been produced
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
|
| 6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
| 7 |
+
from src.display.utils import AutoEvalColumnAsset, EvalQueueColumn
|
| 8 |
from src.leaderboard.read_evals import get_raw_eval_results
|
| 9 |
|
| 10 |
|
|
|
|
| 17 |
|
| 18 |
df = pd.DataFrame.from_records(all_data_json)
|
| 19 |
print(df)
|
| 20 |
+
df = df.sort_values(by=[AutoEvalColumnAsset.average.name], ascending=False)
|
| 21 |
df = df[cols].round(decimals=2)
|
| 22 |
|
| 23 |
# filter out if any of the benchmarks have not been produced
|