Spaces:
Running
Running
Commit
·
b5c23a1
1
Parent(s):
d0ab546
major changes to space
Browse files- .github/workflows/sync-to-hf.yml +0 -55
- app.py +280 -125
- data_loader.py +72 -3
- json_to_parquet.py +0 -228
- scripts/convert_to_parquet.py +0 -142
- scripts/json_to_parquet.py +0 -222
- ui_components.py +761 -134
- upload_to_hf.py +0 -122
.github/workflows/sync-to-hf.yml
DELETED
|
@@ -1,55 +0,0 @@
|
|
| 1 |
-
name: Sync to HuggingFace Dataset
|
| 2 |
-
|
| 3 |
-
on:
|
| 4 |
-
push:
|
| 5 |
-
branches: [main]
|
| 6 |
-
paths:
|
| 7 |
-
- 'data/**/*.json'
|
| 8 |
-
workflow_dispatch: # Allow manual trigger
|
| 9 |
-
|
| 10 |
-
jobs:
|
| 11 |
-
sync-to-huggingface:
|
| 12 |
-
runs-on: ubuntu-latest
|
| 13 |
-
|
| 14 |
-
steps:
|
| 15 |
-
- name: Checkout repository
|
| 16 |
-
uses: actions/checkout@v4
|
| 17 |
-
with:
|
| 18 |
-
fetch-depth: 2
|
| 19 |
-
|
| 20 |
-
- name: Set up Python
|
| 21 |
-
uses: actions/setup-python@v5
|
| 22 |
-
with:
|
| 23 |
-
python-version: '3.11'
|
| 24 |
-
|
| 25 |
-
- name: Install dependencies
|
| 26 |
-
run: |
|
| 27 |
-
pip install datasets huggingface_hub pandas pyarrow
|
| 28 |
-
|
| 29 |
-
- name: Convert Changed JSONs to Parquet (Optimized)
|
| 30 |
-
env:
|
| 31 |
-
HF_DATASET_REPO: deepmage121/eee_test
|
| 32 |
-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 33 |
-
run: |
|
| 34 |
-
echo "Detecting changed leaderboards..."
|
| 35 |
-
python scripts/convert_to_parquet.py
|
| 36 |
-
|
| 37 |
-
- name: Upload Changed Parquets to HuggingFace
|
| 38 |
-
env:
|
| 39 |
-
HF_DATASET_REPO: deepmage121/eee_test
|
| 40 |
-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 41 |
-
run: |
|
| 42 |
-
echo "Uploading changed parquets..."
|
| 43 |
-
python scripts/upload_to_hf.py
|
| 44 |
-
|
| 45 |
-
- name: Report status
|
| 46 |
-
if: success()
|
| 47 |
-
run: |
|
| 48 |
-
echo "Successfully synced to HuggingFace dataset"
|
| 49 |
-
echo "View at: https://huggingface.co/datasets/deepmage121/eee_test"
|
| 50 |
-
if [ -f parquet_output/changed_leaderboards.json ]; then
|
| 51 |
-
echo ""
|
| 52 |
-
echo "Changes processed:"
|
| 53 |
-
cat parquet_output/changed_leaderboards.json
|
| 54 |
-
fi
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -6,66 +6,40 @@ import gradio as gr
|
|
| 6 |
import pandas as pd
|
| 7 |
from pathlib import Path
|
| 8 |
|
| 9 |
-
# Import custom modules
|
| 10 |
from data_loader import (
|
| 11 |
load_hf_dataset_on_startup,
|
| 12 |
get_available_leaderboards,
|
| 13 |
get_eval_metadata,
|
| 14 |
build_leaderboard_table,
|
| 15 |
clear_cache,
|
|
|
|
|
|
|
| 16 |
DATA_DIR
|
| 17 |
)
|
| 18 |
-
from ui_components import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
-
def export_leaderboard_to_json(selected_leaderboard):
|
| 22 |
-
"""Export current leaderboard to JSON files in a zip using parquet_to_folder."""
|
| 23 |
-
if not selected_leaderboard:
|
| 24 |
-
return None
|
| 25 |
-
|
| 26 |
-
import tempfile
|
| 27 |
-
import shutil
|
| 28 |
-
import zipfile
|
| 29 |
-
from json_to_parquet import parquet_to_folder
|
| 30 |
-
|
| 31 |
-
try:
|
| 32 |
-
# Find the parquet file in DATA_DIR
|
| 33 |
-
parquet_path = DATA_DIR / selected_leaderboard / f"{selected_leaderboard}.parquet"
|
| 34 |
-
|
| 35 |
-
if not parquet_path.exists():
|
| 36 |
-
print(f"Parquet file not found: {parquet_path}")
|
| 37 |
-
return None
|
| 38 |
-
|
| 39 |
-
# Create temp directory for export
|
| 40 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
| 41 |
-
temp_path = Path(temp_dir)
|
| 42 |
-
output_dir = temp_path / "json_export"
|
| 43 |
-
output_dir.mkdir()
|
| 44 |
-
|
| 45 |
-
# Use the round-trip functionality from json_to_parquet
|
| 46 |
-
parquet_to_folder(str(parquet_path), str(output_dir))
|
| 47 |
-
|
| 48 |
-
# Create zip file
|
| 49 |
-
zip_path = temp_path / f"{selected_leaderboard}_export.zip"
|
| 50 |
-
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 51 |
-
for json_file in output_dir.rglob("*.json"):
|
| 52 |
-
arcname = json_file.relative_to(output_dir)
|
| 53 |
-
zipf.write(json_file, arcname)
|
| 54 |
-
|
| 55 |
-
# Copy to a permanent location for download
|
| 56 |
-
final_zip = Path(tempfile.gettempdir()) / f"{selected_leaderboard}_export.zip"
|
| 57 |
-
shutil.copy(zip_path, final_zip)
|
| 58 |
-
|
| 59 |
-
return str(final_zip)
|
| 60 |
-
except Exception as e:
|
| 61 |
-
print(f"Export error: {e}")
|
| 62 |
-
return None
|
| 63 |
|
| 64 |
-
|
| 65 |
-
def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.Progress()):
|
| 66 |
"""Loads and aggregates data for the selected leaderboard."""
|
| 67 |
if not selected_leaderboard:
|
| 68 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
metadata = get_eval_metadata(selected_leaderboard)
|
| 71 |
|
|
@@ -73,110 +47,261 @@ def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.
|
|
| 73 |
progress(value, desc=desc)
|
| 74 |
|
| 75 |
df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
|
| 76 |
-
total_count = len(df)
|
| 77 |
|
| 78 |
-
# Apply search filter (searches all columns)
|
| 79 |
if search_query and not df.empty:
|
| 80 |
mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
|
| 81 |
df = df[mask]
|
| 82 |
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
load_hf_dataset_on_startup()
|
| 94 |
|
| 95 |
-
# Build
|
| 96 |
-
with gr.Blocks(title="Eval
|
| 97 |
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
with gr.
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
)
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
gr.Markdown("""
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
**Data submissions happen via GitHub Pull Requests:**
|
| 128 |
-
|
| 129 |
-
1. **Fork** [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
|
| 130 |
-
2. **Add your JSON files** to `data/<leaderboard>/<developer>/<model>/`
|
| 131 |
-
3. **Create a Pull Request**
|
| 132 |
-
4. **Automated validation** checks your data
|
| 133 |
-
5. **After merge**: GitHub Actions automatically syncs to HuggingFace
|
| 134 |
-
6. **Refresh this page** to see your data!
|
| 135 |
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
└── developer_name/
|
| 141 |
-
└── model_name/
|
| 142 |
-
└── {uuid}.json
|
| 143 |
-
```
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
📖 [**Full Submission Guide**](https://github.com/evaleval/every_eval_ever#contributor-guide) |
|
| 148 |
-
📋 [**JSON Schema**](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) |
|
| 149 |
-
👀 [**See Examples**](https://github.com/evaleval/every_eval_ever/tree/main/data)
|
| 150 |
""")
|
| 151 |
-
|
| 152 |
-
init_df, init_search_msg, init_header, init_metrics = update_leaderboard_table(initial_value)
|
| 153 |
|
| 154 |
-
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
|
|
|
| 157 |
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
label=None,
|
| 161 |
-
interactive=False,
|
| 162 |
-
wrap=True,
|
| 163 |
-
elem_classes="dataframe"
|
| 164 |
-
)
|
| 165 |
-
|
| 166 |
-
metrics_view = gr.HTML(value=init_metrics)
|
| 167 |
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
#
|
| 170 |
leaderboard_selector.change(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
fn=update_leaderboard_table,
|
| 172 |
-
inputs=[leaderboard_selector, search_box],
|
| 173 |
-
outputs=[leaderboard_table,
|
| 174 |
)
|
| 175 |
|
| 176 |
search_box.input(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
fn=update_leaderboard_table,
|
| 178 |
-
inputs=[leaderboard_selector, search_box],
|
| 179 |
-
outputs=[leaderboard_table,
|
| 180 |
)
|
| 181 |
|
| 182 |
refresh_btn.click(
|
|
@@ -184,10 +309,40 @@ Each JSON file should follow the schema and be named with a unique UUID.
|
|
| 184 |
outputs=[leaderboard_selector]
|
| 185 |
).then(
|
| 186 |
fn=lambda: clear_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
).then(
|
| 188 |
fn=update_leaderboard_table,
|
| 189 |
-
inputs=[leaderboard_selector, search_box],
|
| 190 |
-
outputs=[leaderboard_table,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
)
|
| 192 |
|
| 193 |
DATA_DIR.mkdir(exist_ok=True)
|
|
|
|
| 6 |
import pandas as pd
|
| 7 |
from pathlib import Path
|
| 8 |
|
|
|
|
| 9 |
from data_loader import (
|
| 10 |
load_hf_dataset_on_startup,
|
| 11 |
get_available_leaderboards,
|
| 12 |
get_eval_metadata,
|
| 13 |
build_leaderboard_table,
|
| 14 |
clear_cache,
|
| 15 |
+
search_model_across_leaderboards,
|
| 16 |
+
get_all_model_names,
|
| 17 |
DATA_DIR
|
| 18 |
)
|
| 19 |
+
from ui_components import (
|
| 20 |
+
get_theme,
|
| 21 |
+
get_custom_css,
|
| 22 |
+
format_leaderboard_header,
|
| 23 |
+
format_metric_details,
|
| 24 |
+
format_model_card,
|
| 25 |
+
)
|
| 26 |
|
| 27 |
+
PAGE_SIZE = 50
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
+
def update_leaderboard_table(selected_leaderboard, search_query="", current_page=1, sort_column=None, progress=gr.Progress()):
|
|
|
|
| 31 |
"""Loads and aggregates data for the selected leaderboard."""
|
| 32 |
if not selected_leaderboard:
|
| 33 |
+
return (
|
| 34 |
+
pd.DataFrame(),
|
| 35 |
+
format_leaderboard_header(None, {}),
|
| 36 |
+
format_metric_details(None, {}),
|
| 37 |
+
gr.update(choices=[], value=None),
|
| 38 |
+
gr.update(interactive=False),
|
| 39 |
+
gr.update(interactive=False),
|
| 40 |
+
gr.update(choices=[], value=None),
|
| 41 |
+
"0 / 0",
|
| 42 |
+
)
|
| 43 |
|
| 44 |
metadata = get_eval_metadata(selected_leaderboard)
|
| 45 |
|
|
|
|
| 47 |
progress(value, desc=desc)
|
| 48 |
|
| 49 |
df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
|
|
|
|
| 50 |
|
|
|
|
| 51 |
if search_query and not df.empty:
|
| 52 |
mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
|
| 53 |
df = df[mask]
|
| 54 |
|
| 55 |
+
filtered_count = len(df)
|
| 56 |
+
|
| 57 |
+
if sort_column and sort_column in df.columns and not df.empty:
|
| 58 |
+
df = df.sort_values(by=sort_column, ascending=False, na_position='last')
|
| 59 |
+
|
| 60 |
+
total_pages = max(1, (filtered_count + PAGE_SIZE - 1) // PAGE_SIZE) if filtered_count > 0 else 1
|
| 61 |
+
current_page = max(1, min(current_page, total_pages))
|
| 62 |
+
|
| 63 |
+
start_idx = (current_page - 1) * PAGE_SIZE
|
| 64 |
+
end_idx = start_idx + PAGE_SIZE
|
| 65 |
+
df_paginated = df.iloc[start_idx:end_idx] if not df.empty else df
|
| 66 |
+
|
| 67 |
+
page_choices = [str(i) for i in range(1, total_pages + 1)]
|
| 68 |
+
page_dropdown = gr.update(choices=page_choices, value=str(current_page))
|
| 69 |
+
prev_btn = gr.update(interactive=(current_page > 1))
|
| 70 |
+
next_btn = gr.update(interactive=(current_page < total_pages))
|
| 71 |
+
page_info = f"{current_page} / {total_pages}"
|
| 72 |
|
| 73 |
+
sort_choices = list(df.columns) if not df.empty else []
|
| 74 |
+
default_sort = sort_column if sort_column and sort_column in sort_choices else ("Average" if "Average" in sort_choices else (sort_choices[0] if sort_choices else None))
|
| 75 |
+
sort_column_update = gr.update(choices=sort_choices, value=default_sort)
|
| 76 |
+
|
| 77 |
+
return (
|
| 78 |
+
df_paginated,
|
| 79 |
+
format_leaderboard_header(selected_leaderboard, metadata),
|
| 80 |
+
format_metric_details(selected_leaderboard, metadata),
|
| 81 |
+
page_dropdown,
|
| 82 |
+
prev_btn,
|
| 83 |
+
next_btn,
|
| 84 |
+
sort_column_update,
|
| 85 |
+
page_info,
|
| 86 |
+
)
|
| 87 |
|
| 88 |
|
| 89 |
+
def search_model(model_query):
|
| 90 |
+
"""Search for a model and return formatted card."""
|
| 91 |
+
if not model_query or len(model_query) < 2:
|
| 92 |
+
return """
|
| 93 |
+
<div class="no-results">
|
| 94 |
+
<h3>Search for a model</h3>
|
| 95 |
+
<p>Enter a model name to see its benchmarks across all leaderboards</p>
|
| 96 |
+
</div>
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
results, _ = search_model_across_leaderboards(model_query)
|
| 100 |
+
|
| 101 |
+
if not results:
|
| 102 |
+
return f"""
|
| 103 |
+
<div class="no-results">
|
| 104 |
+
<h3>No results for "{model_query}"</h3>
|
| 105 |
+
<p>Try a different model name or check the spelling</p>
|
| 106 |
+
</div>
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
# Use the first matching model
|
| 110 |
+
model_name = list(results.keys())[0]
|
| 111 |
+
model_data = results[model_name]
|
| 112 |
+
|
| 113 |
+
return format_model_card(model_name, model_data)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def get_model_suggestions(query):
|
| 117 |
+
"""Get model name suggestions for autocomplete."""
|
| 118 |
+
if not query or len(query) < 2:
|
| 119 |
+
return gr.update(choices=[])
|
| 120 |
+
|
| 121 |
+
_, matches = search_model_across_leaderboards(query)
|
| 122 |
+
return gr.update(choices=matches[:15])
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# Load data at startup
|
| 126 |
load_hf_dataset_on_startup()
|
| 127 |
|
| 128 |
+
# Build interface
|
| 129 |
+
with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css()) as demo:
|
| 130 |
|
| 131 |
+
# Header
|
| 132 |
+
gr.HTML("""
|
| 133 |
+
<div class="app-header">
|
| 134 |
+
<div class="logo-mark">E³</div>
|
| 135 |
+
<div class="brand">
|
| 136 |
+
<h1>Every Eval Ever</h1>
|
| 137 |
+
<span class="tagline">Browse and compare model benchmarks</span>
|
| 138 |
+
</div>
|
| 139 |
+
<div class="header-right">
|
| 140 |
+
<span class="version-badge">beta</span>
|
| 141 |
+
</div>
|
| 142 |
+
</div>
|
| 143 |
+
""")
|
| 144 |
|
| 145 |
+
with gr.Tabs():
|
| 146 |
+
# === TAB 1: Leaderboard View ===
|
| 147 |
+
with gr.TabItem("📊 Leaderboards"):
|
| 148 |
+
with gr.Row(elem_classes="controls-bar"):
|
| 149 |
+
initial_choices = get_available_leaderboards()
|
| 150 |
+
initial_value = initial_choices[0] if initial_choices else None
|
| 151 |
+
|
| 152 |
+
with gr.Column(scale=2, min_width=200):
|
| 153 |
+
leaderboard_selector = gr.Dropdown(
|
| 154 |
+
choices=initial_choices,
|
| 155 |
+
value=initial_value,
|
| 156 |
+
label="Leaderboard",
|
| 157 |
+
interactive=True
|
| 158 |
+
)
|
| 159 |
+
with gr.Column(scale=3, min_width=250):
|
| 160 |
+
search_box = gr.Textbox(
|
| 161 |
+
label="Filter",
|
| 162 |
+
placeholder="Filter models...",
|
| 163 |
+
show_label=True
|
| 164 |
+
)
|
| 165 |
+
with gr.Column(scale=1, min_width=100):
|
| 166 |
+
refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm")
|
| 167 |
+
|
| 168 |
+
init_df, init_header, init_metrics, init_page_dropdown, init_prev, init_next, init_sort_cols, init_page_info = update_leaderboard_table(initial_value, "", 1, "Average")
|
| 169 |
+
|
| 170 |
+
header_view = gr.HTML(value=init_header)
|
| 171 |
+
|
| 172 |
+
# Hidden sort state (default to Average)
|
| 173 |
+
sort_column_dropdown = gr.Dropdown(
|
| 174 |
+
choices=init_sort_cols.get("choices", []) if hasattr(init_sort_cols, 'get') else [],
|
| 175 |
+
value=init_sort_cols.get("value") if hasattr(init_sort_cols, 'get') else None,
|
| 176 |
+
visible=False,
|
| 177 |
)
|
| 178 |
+
|
| 179 |
+
leaderboard_table = gr.Dataframe(
|
| 180 |
+
value=init_df,
|
| 181 |
+
label=None,
|
| 182 |
+
interactive=False,
|
| 183 |
+
wrap=False,
|
| 184 |
+
elem_classes="dataframe",
|
| 185 |
+
column_widths=["28%", "12%", "7%", "7%"]
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
+
# Pagination below table - centered
|
| 189 |
+
with gr.Row(elem_classes="pagination-bar"):
|
| 190 |
+
prev_btn = gr.Button("←", variant="secondary", size="sm", min_width=60)
|
| 191 |
+
page_info = gr.Markdown(value=init_page_info, elem_classes="page-info")
|
| 192 |
+
next_btn = gr.Button("→", variant="secondary", size="sm", min_width=60)
|
| 193 |
+
page_dropdown = gr.Dropdown(
|
| 194 |
+
choices=[],
|
| 195 |
+
value="1",
|
| 196 |
+
visible=False,
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
metrics_view = gr.HTML(value=init_metrics)
|
| 200 |
+
|
| 201 |
+
# === TAB 2: Model View ===
|
| 202 |
+
with gr.TabItem("🔍 Model Lookup"):
|
| 203 |
+
gr.Markdown("### Find a model's benchmarks across all leaderboards")
|
| 204 |
+
|
| 205 |
+
with gr.Row(elem_classes="controls-bar"):
|
| 206 |
+
with gr.Column(scale=4):
|
| 207 |
+
model_search_dropdown = gr.Dropdown(
|
| 208 |
+
choices=[],
|
| 209 |
+
label="Model Name",
|
| 210 |
+
allow_custom_value=True,
|
| 211 |
+
interactive=True,
|
| 212 |
+
filterable=True,
|
| 213 |
+
)
|
| 214 |
+
with gr.Column(scale=1, min_width=100):
|
| 215 |
+
model_search_btn = gr.Button("Search", variant="primary", size="sm")
|
| 216 |
+
|
| 217 |
+
model_card_view = gr.HTML(value="""
|
| 218 |
+
<div class="no-results">
|
| 219 |
+
<h3>Search for a model</h3>
|
| 220 |
+
<p>Start typing to see suggestions, then select a model</p>
|
| 221 |
+
</div>
|
| 222 |
+
""")
|
| 223 |
+
|
| 224 |
+
# Submission guide
|
| 225 |
+
with gr.Accordion("📤 How to Submit Data", open=False):
|
| 226 |
gr.Markdown("""
|
| 227 |
+
**Submit via GitHub Pull Request:**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
+
1. Fork [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
|
| 230 |
+
2. Add JSON files to `data/<leaderboard>/<developer>/<model>/`
|
| 231 |
+
3. Open a PR — automated validation runs on submission
|
| 232 |
+
4. After merge, data syncs to HuggingFace automatically
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
+
[Submission Guide](https://github.com/evaleval/every_eval_ever#contributor-guide) · [JSON Schema](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
""")
|
|
|
|
|
|
|
| 236 |
|
| 237 |
+
# === State ===
|
| 238 |
+
current_page_state = gr.State(value=1)
|
| 239 |
+
sort_column_state = gr.State(value="Average")
|
| 240 |
|
| 241 |
+
def go_prev(current):
|
| 242 |
+
return max(1, current - 1)
|
| 243 |
|
| 244 |
+
def go_next(current):
|
| 245 |
+
return current + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
|
| 247 |
+
def reset_page():
|
| 248 |
+
return 1
|
| 249 |
|
| 250 |
+
# === Leaderboard Events ===
|
| 251 |
leaderboard_selector.change(
|
| 252 |
+
fn=reset_page, outputs=[current_page_state]
|
| 253 |
+
).then(
|
| 254 |
+
fn=lambda: "Average", outputs=[sort_column_state]
|
| 255 |
+
).then(
|
| 256 |
fn=update_leaderboard_table,
|
| 257 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 258 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 259 |
)
|
| 260 |
|
| 261 |
search_box.input(
|
| 262 |
+
fn=reset_page, outputs=[current_page_state]
|
| 263 |
+
).then(
|
| 264 |
+
fn=update_leaderboard_table,
|
| 265 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 266 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
sort_column_dropdown.change(
|
| 270 |
+
fn=lambda col: col,
|
| 271 |
+
inputs=[sort_column_dropdown],
|
| 272 |
+
outputs=[sort_column_state]
|
| 273 |
+
).then(
|
| 274 |
+
fn=reset_page, outputs=[current_page_state]
|
| 275 |
+
).then(
|
| 276 |
+
fn=update_leaderboard_table,
|
| 277 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 278 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
page_dropdown.change(
|
| 282 |
+
fn=lambda p: int(p) if p else 1,
|
| 283 |
+
inputs=[page_dropdown],
|
| 284 |
+
outputs=[current_page_state]
|
| 285 |
+
).then(
|
| 286 |
+
fn=update_leaderboard_table,
|
| 287 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 288 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
prev_btn.click(
|
| 292 |
+
fn=go_prev, inputs=[current_page_state], outputs=[current_page_state]
|
| 293 |
+
).then(
|
| 294 |
+
fn=update_leaderboard_table,
|
| 295 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 296 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
next_btn.click(
|
| 300 |
+
fn=go_next, inputs=[current_page_state], outputs=[current_page_state]
|
| 301 |
+
).then(
|
| 302 |
fn=update_leaderboard_table,
|
| 303 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 304 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 305 |
)
|
| 306 |
|
| 307 |
refresh_btn.click(
|
|
|
|
| 309 |
outputs=[leaderboard_selector]
|
| 310 |
).then(
|
| 311 |
fn=lambda: clear_cache()
|
| 312 |
+
).then(
|
| 313 |
+
fn=reset_page, outputs=[current_page_state]
|
| 314 |
+
).then(
|
| 315 |
+
fn=lambda: "Average", outputs=[sort_column_state]
|
| 316 |
).then(
|
| 317 |
fn=update_leaderboard_table,
|
| 318 |
+
inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
|
| 319 |
+
outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
# === Model Search Events ===
|
| 323 |
+
def update_model_suggestions(query):
|
| 324 |
+
"""Update dropdown choices based on query."""
|
| 325 |
+
if not query or len(query) < 2:
|
| 326 |
+
return gr.update(choices=[])
|
| 327 |
+
_, matches = search_model_across_leaderboards(query)
|
| 328 |
+
return gr.update(choices=matches[:20])
|
| 329 |
+
|
| 330 |
+
model_search_dropdown.input(
|
| 331 |
+
fn=update_model_suggestions,
|
| 332 |
+
inputs=[model_search_dropdown],
|
| 333 |
+
outputs=[model_search_dropdown]
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
model_search_btn.click(
|
| 337 |
+
fn=search_model,
|
| 338 |
+
inputs=[model_search_dropdown],
|
| 339 |
+
outputs=[model_card_view]
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
model_search_dropdown.select(
|
| 343 |
+
fn=search_model,
|
| 344 |
+
inputs=[model_search_dropdown],
|
| 345 |
+
outputs=[model_card_view]
|
| 346 |
)
|
| 347 |
|
| 348 |
DATA_DIR.mkdir(exist_ok=True)
|
data_loader.py
CHANGED
|
@@ -296,11 +296,16 @@ def build_leaderboard_table(selected_leaderboard, search_query="", progress_call
|
|
| 296 |
if len(eval_only_cols) > 0:
|
| 297 |
df["Average"] = df[eval_only_cols].mean(axis=1).round(3)
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
base_cols = [c for c in base_cols if c in df.columns]
|
|
|
|
| 302 |
|
| 303 |
-
final_cols = base_cols + sorted(eval_cols)
|
| 304 |
df = df[final_cols]
|
| 305 |
|
| 306 |
if "Average" in df.columns:
|
|
@@ -315,3 +320,67 @@ def clear_cache():
|
|
| 315 |
"""Clears all caches."""
|
| 316 |
LEADERBOARD_CACHE.clear()
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
if len(eval_only_cols) > 0:
|
| 297 |
df["Average"] = df[eval_only_cols].mean(axis=1).round(3)
|
| 298 |
|
| 299 |
+
# Base columns: Model, Developer, Params, Average
|
| 300 |
+
# Eval columns: all evaluation scores
|
| 301 |
+
# Model detail columns: Arch, Precision (moved to end)
|
| 302 |
+
base_cols = ["Model", "Developer", "Params (B)", "Average"]
|
| 303 |
+
model_detail_cols = ["Arch", "Precision"]
|
| 304 |
+
eval_cols = [c for c in df.columns if c not in base_cols and c not in model_detail_cols]
|
| 305 |
base_cols = [c for c in base_cols if c in df.columns]
|
| 306 |
+
model_detail_cols = [c for c in model_detail_cols if c in df.columns]
|
| 307 |
|
| 308 |
+
final_cols = base_cols + sorted(eval_cols) + model_detail_cols
|
| 309 |
df = df[final_cols]
|
| 310 |
|
| 311 |
if "Average" in df.columns:
|
|
|
|
| 320 |
"""Clears all caches."""
|
| 321 |
LEADERBOARD_CACHE.clear()
|
| 322 |
|
| 323 |
+
|
| 324 |
+
def search_model_across_leaderboards(model_query):
|
| 325 |
+
"""Search for a model across all leaderboards and return aggregated results."""
|
| 326 |
+
if not model_query or not HF_DATASET_CACHE:
|
| 327 |
+
return {}, []
|
| 328 |
+
|
| 329 |
+
model_query_lower = model_query.lower().strip()
|
| 330 |
+
results = {}
|
| 331 |
+
all_matches = []
|
| 332 |
+
|
| 333 |
+
for leaderboard_name, parsed_items in HF_DATASET_CACHE.items():
|
| 334 |
+
for item in parsed_items:
|
| 335 |
+
model_id = item.get("model", "")
|
| 336 |
+
# Check if query matches model name (case insensitive, partial match)
|
| 337 |
+
if model_query_lower in model_id.lower():
|
| 338 |
+
all_matches.append(model_id)
|
| 339 |
+
|
| 340 |
+
# Exact match gets priority
|
| 341 |
+
if model_id.lower() == model_query_lower or model_id == model_query:
|
| 342 |
+
if model_id not in results:
|
| 343 |
+
results[model_id] = {}
|
| 344 |
+
results[model_id][leaderboard_name] = {
|
| 345 |
+
"developer": item.get("developer"),
|
| 346 |
+
"params": item.get("params"),
|
| 347 |
+
"architecture": item.get("architecture"),
|
| 348 |
+
"precision": item.get("precision"),
|
| 349 |
+
"results": item.get("results", {})
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
# If no exact match, use partial matches
|
| 353 |
+
if not results and all_matches:
|
| 354 |
+
# Get the first partial match
|
| 355 |
+
for leaderboard_name, parsed_items in HF_DATASET_CACHE.items():
|
| 356 |
+
for item in parsed_items:
|
| 357 |
+
model_id = item.get("model", "")
|
| 358 |
+
if model_query_lower in model_id.lower():
|
| 359 |
+
if model_id not in results:
|
| 360 |
+
results[model_id] = {}
|
| 361 |
+
results[model_id][leaderboard_name] = {
|
| 362 |
+
"developer": item.get("developer"),
|
| 363 |
+
"params": item.get("params"),
|
| 364 |
+
"architecture": item.get("architecture"),
|
| 365 |
+
"precision": item.get("precision"),
|
| 366 |
+
"results": item.get("results", {})
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
# Return unique matches for autocomplete
|
| 370 |
+
unique_matches = sorted(set(all_matches))[:20] # Limit to 20 suggestions
|
| 371 |
+
|
| 372 |
+
return results, unique_matches
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def get_all_model_names():
|
| 376 |
+
"""Get all unique model names across all leaderboards."""
|
| 377 |
+
if not HF_DATASET_CACHE:
|
| 378 |
+
return []
|
| 379 |
+
|
| 380 |
+
models = set()
|
| 381 |
+
for parsed_items in HF_DATASET_CACHE.values():
|
| 382 |
+
for item in parsed_items:
|
| 383 |
+
models.add(item.get("model", ""))
|
| 384 |
+
|
| 385 |
+
return sorted(models)
|
| 386 |
+
|
json_to_parquet.py
DELETED
|
@@ -1,228 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
import json
|
| 4 |
-
from pathlib import Path
|
| 5 |
-
import pandas as pd
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
def json_to_row(json_path: Path) -> dict:
|
| 9 |
-
"""Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns)."""
|
| 10 |
-
with open(json_path, 'r') as f:
|
| 11 |
-
data = json.load(f)
|
| 12 |
-
|
| 13 |
-
required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp",
|
| 14 |
-
"source_data", "source_metadata", "model_info", "evaluation_results"]
|
| 15 |
-
for field in required_fields:
|
| 16 |
-
if field not in data:
|
| 17 |
-
raise ValueError(f"{json_path}: Missing required field '{field}'")
|
| 18 |
-
|
| 19 |
-
if "evaluation_source_name" not in data["evaluation_source"]:
|
| 20 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'")
|
| 21 |
-
if "evaluation_source_type" not in data["evaluation_source"]:
|
| 22 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'")
|
| 23 |
-
|
| 24 |
-
if "source_organization_name" not in data["source_metadata"]:
|
| 25 |
-
raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'")
|
| 26 |
-
if "evaluator_relationship" not in data["source_metadata"]:
|
| 27 |
-
raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'")
|
| 28 |
-
|
| 29 |
-
if "name" not in data["model_info"]:
|
| 30 |
-
raise ValueError(f"{json_path}: Missing required field 'model_info.name'")
|
| 31 |
-
if "id" not in data["model_info"]:
|
| 32 |
-
raise ValueError(f"{json_path}: Missing required field 'model_info.id'")
|
| 33 |
-
if "developer" not in data["model_info"]:
|
| 34 |
-
raise ValueError(f"{json_path}: Missing required field 'model_info.developer'")
|
| 35 |
-
|
| 36 |
-
leaderboard = data["evaluation_source"]["evaluation_source_name"]
|
| 37 |
-
model = data["model_info"]["id"]
|
| 38 |
-
uuid = json_path.stem
|
| 39 |
-
developer = data["model_info"]["developer"]
|
| 40 |
-
|
| 41 |
-
# Validate evaluation results
|
| 42 |
-
for eval_result in data["evaluation_results"]:
|
| 43 |
-
if "evaluation_name" not in eval_result:
|
| 44 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'")
|
| 45 |
-
if "metric_config" not in eval_result:
|
| 46 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'")
|
| 47 |
-
if "score_details" not in eval_result:
|
| 48 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'")
|
| 49 |
-
|
| 50 |
-
if "lower_is_better" not in eval_result["metric_config"]:
|
| 51 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'")
|
| 52 |
-
if "score" not in eval_result["score_details"]:
|
| 53 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'")
|
| 54 |
-
|
| 55 |
-
row = {
|
| 56 |
-
# Folder structure (for reconstruction)
|
| 57 |
-
"_leaderboard": leaderboard,
|
| 58 |
-
"_developer": developer,
|
| 59 |
-
"_model": model,
|
| 60 |
-
"_uuid": uuid,
|
| 61 |
-
|
| 62 |
-
# Required top-level fields
|
| 63 |
-
"schema_version": data["schema_version"],
|
| 64 |
-
"evaluation_id": data["evaluation_id"],
|
| 65 |
-
"retrieved_timestamp": data["retrieved_timestamp"],
|
| 66 |
-
"source_data": json.dumps(data["source_data"]),
|
| 67 |
-
|
| 68 |
-
# Required nested fields
|
| 69 |
-
"evaluation_source_name": data["evaluation_source"]["evaluation_source_name"],
|
| 70 |
-
"evaluation_source_type": data["evaluation_source"]["evaluation_source_type"],
|
| 71 |
-
|
| 72 |
-
"source_organization_name": data["source_metadata"]["source_organization_name"],
|
| 73 |
-
"source_organization_url": data["source_metadata"].get("source_organization_url"),
|
| 74 |
-
"source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"),
|
| 75 |
-
"evaluator_relationship": data["source_metadata"]["evaluator_relationship"],
|
| 76 |
-
|
| 77 |
-
"model_name": data["model_info"]["name"],
|
| 78 |
-
"model_id": data["model_info"]["id"],
|
| 79 |
-
"model_developer": data["model_info"]["developer"],
|
| 80 |
-
"model_inference_platform": data["model_info"].get("inference_platform"),
|
| 81 |
-
|
| 82 |
-
# Store full evaluation_results and additional_details as JSON
|
| 83 |
-
"evaluation_results": json.dumps(data["evaluation_results"]),
|
| 84 |
-
"additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None,
|
| 85 |
-
}
|
| 86 |
-
|
| 87 |
-
return row
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
def add_to_parquet(json_or_folder: str, parquet_file: str):
|
| 91 |
-
"""
|
| 92 |
-
Add JSON(s) to Parquet file.
|
| 93 |
-
Creates new file if it doesn't exist, appends and deduplicates if it does.
|
| 94 |
-
|
| 95 |
-
Args:
|
| 96 |
-
json_or_folder: Path to single JSON file or folder containing JSONs
|
| 97 |
-
parquet_file: Output Parquet file path
|
| 98 |
-
"""
|
| 99 |
-
input_path = Path(json_or_folder)
|
| 100 |
-
|
| 101 |
-
if input_path.is_file():
|
| 102 |
-
json_files = [input_path]
|
| 103 |
-
elif input_path.is_dir():
|
| 104 |
-
json_files = list(input_path.rglob("*.json"))
|
| 105 |
-
if not json_files:
|
| 106 |
-
raise ValueError(f"No JSON files found in directory: {json_or_folder}")
|
| 107 |
-
else:
|
| 108 |
-
raise ValueError(f"Invalid input: {json_or_folder}")
|
| 109 |
-
|
| 110 |
-
print(f"Processing {len(json_files)} JSON file(s)...")
|
| 111 |
-
|
| 112 |
-
parquet_path = Path(parquet_file)
|
| 113 |
-
if parquet_path.exists():
|
| 114 |
-
existing_df = pd.read_parquet(parquet_file)
|
| 115 |
-
existing_keys = set(
|
| 116 |
-
existing_df[["_leaderboard", "_developer", "_model", "_uuid"]]
|
| 117 |
-
.apply(tuple, axis=1)
|
| 118 |
-
)
|
| 119 |
-
print(f"Found {len(existing_df)} existing rows")
|
| 120 |
-
else:
|
| 121 |
-
existing_df = None
|
| 122 |
-
existing_keys = set()
|
| 123 |
-
|
| 124 |
-
all_rows = []
|
| 125 |
-
skipped = 0
|
| 126 |
-
for i, jf in enumerate(json_files, 1):
|
| 127 |
-
if i % 100 == 0:
|
| 128 |
-
print(f" {i}/{len(json_files)}")
|
| 129 |
-
|
| 130 |
-
row = json_to_row(jf)
|
| 131 |
-
key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"])
|
| 132 |
-
if key not in existing_keys:
|
| 133 |
-
all_rows.append(row)
|
| 134 |
-
existing_keys.add(key)
|
| 135 |
-
else:
|
| 136 |
-
skipped += 1
|
| 137 |
-
|
| 138 |
-
if skipped > 0:
|
| 139 |
-
print(f" Skipped {skipped} duplicate file(s)")
|
| 140 |
-
|
| 141 |
-
# Handle case where no new rows to add
|
| 142 |
-
if not all_rows:
|
| 143 |
-
if existing_df is not None:
|
| 144 |
-
print(f"No new files to add, keeping existing {len(existing_df)} file(s)")
|
| 145 |
-
return
|
| 146 |
-
else:
|
| 147 |
-
raise ValueError("No valid JSON files to process and no existing parquet file")
|
| 148 |
-
|
| 149 |
-
new_df = pd.DataFrame(all_rows)
|
| 150 |
-
|
| 151 |
-
if existing_df is not None:
|
| 152 |
-
df = pd.concat([existing_df, new_df], ignore_index=True)
|
| 153 |
-
print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)")
|
| 154 |
-
else:
|
| 155 |
-
df = new_df
|
| 156 |
-
|
| 157 |
-
df.to_parquet(parquet_file, index=False)
|
| 158 |
-
print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)")
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
def parquet_to_folder(parquet_file: str, output_dir: str):
|
| 162 |
-
"""Reconstruct folder structure from Parquet."""
|
| 163 |
-
df = pd.read_parquet(parquet_file)
|
| 164 |
-
out = Path(output_dir)
|
| 165 |
-
|
| 166 |
-
for _, row in df.iterrows():
|
| 167 |
-
lb = row["_leaderboard"]
|
| 168 |
-
dev = row["_developer"]
|
| 169 |
-
model = row["_model"]
|
| 170 |
-
uuid = row["_uuid"]
|
| 171 |
-
|
| 172 |
-
json_data = {
|
| 173 |
-
"schema_version": row["schema_version"],
|
| 174 |
-
"evaluation_id": row["evaluation_id"],
|
| 175 |
-
"retrieved_timestamp": row["retrieved_timestamp"],
|
| 176 |
-
"source_data": json.loads(row["source_data"]),
|
| 177 |
-
"evaluation_source": {
|
| 178 |
-
"evaluation_source_name": row["evaluation_source_name"],
|
| 179 |
-
"evaluation_source_type": row["evaluation_source_type"]
|
| 180 |
-
},
|
| 181 |
-
"source_metadata": {
|
| 182 |
-
"source_organization_name": row["source_organization_name"],
|
| 183 |
-
"evaluator_relationship": row["evaluator_relationship"]
|
| 184 |
-
},
|
| 185 |
-
"model_info": {
|
| 186 |
-
"name": row["model_name"],
|
| 187 |
-
"id": row["model_id"],
|
| 188 |
-
"developer": row["model_developer"]
|
| 189 |
-
},
|
| 190 |
-
"evaluation_results": json.loads(row["evaluation_results"])
|
| 191 |
-
}
|
| 192 |
-
|
| 193 |
-
if pd.notna(row["source_organization_url"]):
|
| 194 |
-
json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"]
|
| 195 |
-
if pd.notna(row["source_organization_logo_url"]):
|
| 196 |
-
json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"]
|
| 197 |
-
|
| 198 |
-
if pd.notna(row["model_inference_platform"]):
|
| 199 |
-
json_data["model_info"]["inference_platform"] = row["model_inference_platform"]
|
| 200 |
-
|
| 201 |
-
if pd.notna(row["additional_details"]):
|
| 202 |
-
json_data["additional_details"] = json.loads(row["additional_details"])
|
| 203 |
-
|
| 204 |
-
file_path = out / lb / dev / model / f"{uuid}.json"
|
| 205 |
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 206 |
-
with open(file_path, 'w') as f:
|
| 207 |
-
json.dump(json_data, f, indent=2)
|
| 208 |
-
|
| 209 |
-
print(f"Reconstructed {len(df)} files to {output_dir}")
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
if __name__ == "__main__":
|
| 213 |
-
import sys
|
| 214 |
-
|
| 215 |
-
if len(sys.argv) < 2:
|
| 216 |
-
print("Usage:")
|
| 217 |
-
print(" python json_to_parquet.py add <json_or_folder> <output.parquet>")
|
| 218 |
-
print(" python json_to_parquet.py export <input.parquet> <output_dir>")
|
| 219 |
-
sys.exit(1)
|
| 220 |
-
|
| 221 |
-
cmd = sys.argv[1]
|
| 222 |
-
|
| 223 |
-
if cmd == "add":
|
| 224 |
-
add_to_parquet(sys.argv[2], sys.argv[3])
|
| 225 |
-
elif cmd == "export":
|
| 226 |
-
parquet_to_folder(sys.argv[2], sys.argv[3])
|
| 227 |
-
else:
|
| 228 |
-
print(f"Unknown command: {cmd}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/convert_to_parquet.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Incremental parquet conversion with HuggingFace sync.
|
| 3 |
-
|
| 4 |
-
Optimized workflow:
|
| 5 |
-
1. Detect changed leaderboards via git diff (instant!)
|
| 6 |
-
2. Download ONLY changed parquets from HF (fast!)
|
| 7 |
-
3. Re-convert ONLY changed leaderboards
|
| 8 |
-
4. Ready for upload (handled by upload_to_hf.py)
|
| 9 |
-
|
| 10 |
-
This avoids downloading and processing unchanged leaderboards.
|
| 11 |
-
"""
|
| 12 |
-
|
| 13 |
-
from pathlib import Path
|
| 14 |
-
import sys
|
| 15 |
-
import subprocess
|
| 16 |
-
import os
|
| 17 |
-
import json
|
| 18 |
-
from datasets import load_dataset
|
| 19 |
-
|
| 20 |
-
sys.path.insert(0, str(Path(__file__).parent.resolve().parent))
|
| 21 |
-
|
| 22 |
-
from json_to_parquet import add_to_parquet
|
| 23 |
-
|
| 24 |
-
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "deepmage121/eee_test")
|
| 25 |
-
|
| 26 |
-
def download_leaderboards(output_dir: Path, leaderboard_names: set[str]) -> set[str]:
|
| 27 |
-
"""Download existing leaderboard parquets from HuggingFace."""
|
| 28 |
-
try:
|
| 29 |
-
dataset_dict = load_dataset(HF_DATASET_REPO)
|
| 30 |
-
downloaded: set[str] = set()
|
| 31 |
-
|
| 32 |
-
for lb in leaderboard_names:
|
| 33 |
-
if lb in dataset_dict:
|
| 34 |
-
print(f" Downloading {lb}")
|
| 35 |
-
dataset_dict[lb].to_pandas().to_parquet(output_dir / f"{lb}.parquet", index=False)
|
| 36 |
-
downloaded.add(lb)
|
| 37 |
-
else:
|
| 38 |
-
print(f" {lb} (new)")
|
| 39 |
-
|
| 40 |
-
print(f"Downloaded {len(downloaded)}/{len(leaderboard_names)} parquet(s)")
|
| 41 |
-
return downloaded
|
| 42 |
-
|
| 43 |
-
except Exception as e:
|
| 44 |
-
print(f"HF download failed: {e}")
|
| 45 |
-
sys.exit(1)
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def detect_modified_leaderboards() -> set[str]:
|
| 49 |
-
"""Get leaderboards with changed JSONs via git diff (HEAD~1)."""
|
| 50 |
-
try:
|
| 51 |
-
result = subprocess.run(
|
| 52 |
-
["git", "diff", "--name-only", "HEAD~1", "HEAD", "data/"],
|
| 53 |
-
capture_output=True, text=True, check=True
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
changed_files = result.stdout.strip().split('\n')
|
| 57 |
-
if not changed_files or changed_files == ['']:
|
| 58 |
-
print("No changes detected in data/")
|
| 59 |
-
return set()
|
| 60 |
-
|
| 61 |
-
leaderboards = {
|
| 62 |
-
Path(f).parts[1]
|
| 63 |
-
for f in changed_files
|
| 64 |
-
if f.startswith('data/') and f.endswith('.json') and len(Path(f).parts) >= 2
|
| 65 |
-
}
|
| 66 |
-
return leaderboards
|
| 67 |
-
|
| 68 |
-
except subprocess.CalledProcessError as e:
|
| 69 |
-
print(f"ERROR: Git command failed: {e}")
|
| 70 |
-
sys.exit(1)
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def convert_changed_leaderboards():
|
| 74 |
-
"""
|
| 75 |
-
Optimized conversion: detect changes, download only changed, re-convert only changed.
|
| 76 |
-
"""
|
| 77 |
-
|
| 78 |
-
data_dir = Path("data")
|
| 79 |
-
output_dir = Path("parquet_output")
|
| 80 |
-
output_dir.mkdir(exist_ok=True)
|
| 81 |
-
|
| 82 |
-
if not data_dir.exists():
|
| 83 |
-
print(f"Data directory not found: {data_dir}")
|
| 84 |
-
sys.exit(1)
|
| 85 |
-
|
| 86 |
-
changed_leaderboards: set[str] = detect_modified_leaderboards()
|
| 87 |
-
|
| 88 |
-
if len(changed_leaderboards) == 0:
|
| 89 |
-
print("No changes detected, nothing to upload")
|
| 90 |
-
manifest = {"changed": [], "converted": []}
|
| 91 |
-
(output_dir / "changed_leaderboards.json").write_text(json.dumps(manifest, indent=2))
|
| 92 |
-
sys.exit(0)
|
| 93 |
-
|
| 94 |
-
print(f"Detected {len(changed_leaderboards)} changed leaderboard(s):")
|
| 95 |
-
for lb in changed_leaderboards:
|
| 96 |
-
print(f" {lb}")
|
| 97 |
-
|
| 98 |
-
downloaded = download_leaderboards(output_dir, changed_leaderboards)
|
| 99 |
-
|
| 100 |
-
converted_count = 0
|
| 101 |
-
error_count = 0
|
| 102 |
-
converted_leaderboards = []
|
| 103 |
-
|
| 104 |
-
for leaderboard_name in changed_leaderboards:
|
| 105 |
-
leaderboard_dir = os.path.join(data_dir, leaderboard_name)
|
| 106 |
-
|
| 107 |
-
parquet_path = os.path.join(output_dir, f"{leaderboard_name}.parquet")
|
| 108 |
-
|
| 109 |
-
print(f"\nConverting: {leaderboard_name}")
|
| 110 |
-
|
| 111 |
-
try:
|
| 112 |
-
add_to_parquet(json_or_folder=str(leaderboard_dir), parquet_file=str(parquet_path))
|
| 113 |
-
|
| 114 |
-
print(f" Converted to {parquet_path}")
|
| 115 |
-
converted_count += 1
|
| 116 |
-
converted_leaderboards.append(leaderboard_name)
|
| 117 |
-
|
| 118 |
-
except Exception as e:
|
| 119 |
-
print(f" Error: {e}")
|
| 120 |
-
error_count += 1
|
| 121 |
-
|
| 122 |
-
manifest = {
|
| 123 |
-
"changed": list(changed_leaderboards),
|
| 124 |
-
"converted": converted_leaderboards,
|
| 125 |
-
"downloaded": list(downloaded),
|
| 126 |
-
"errors": error_count
|
| 127 |
-
}
|
| 128 |
-
manifest_path = os.path.join(output_dir, "changed_leaderboards.json")
|
| 129 |
-
with open(manifest_path, 'w') as f:
|
| 130 |
-
json.dump(manifest, f, indent=2)
|
| 131 |
-
|
| 132 |
-
if error_count > 0:
|
| 133 |
-
sys.exit(1)
|
| 134 |
-
|
| 135 |
-
if converted_count == 0:
|
| 136 |
-
print("Warning: No parquet files successfully converted!")
|
| 137 |
-
sys.exit(1)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
if __name__ == "__main__":
|
| 141 |
-
convert_changed_leaderboards()
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/json_to_parquet.py
DELETED
|
@@ -1,222 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Convert evaluation JSONs to Parquet for HF Datasets.
|
| 3 |
-
Input: single JSON or folder of JSONs (any structure)
|
| 4 |
-
Output: Parquet with all data + reconstructable folder structure
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import json
|
| 8 |
-
from pathlib import Path
|
| 9 |
-
import pandas as pd
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
def json_to_row(json_path: Path) -> dict:
|
| 13 |
-
"""Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns)."""
|
| 14 |
-
with open(json_path, 'r') as f:
|
| 15 |
-
data = json.load(f)
|
| 16 |
-
|
| 17 |
-
required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp",
|
| 18 |
-
"source_data", "source_metadata", "model_info", "evaluation_results"]
|
| 19 |
-
for field in required_fields:
|
| 20 |
-
if field not in data:
|
| 21 |
-
raise ValueError(f"{json_path}: Missing required field '{field}'")
|
| 22 |
-
|
| 23 |
-
if "evaluation_source_name" not in data["evaluation_source"]:
|
| 24 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'")
|
| 25 |
-
if "evaluation_source_type" not in data["evaluation_source"]:
|
| 26 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'")
|
| 27 |
-
|
| 28 |
-
if "source_organization_name" not in data["source_metadata"]:
|
| 29 |
-
raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'")
|
| 30 |
-
if "evaluator_relationship" not in data["source_metadata"]:
|
| 31 |
-
raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'")
|
| 32 |
-
|
| 33 |
-
if "name" not in data["model_info"]:
|
| 34 |
-
raise ValueError(f"{json_path}: Missing required field 'model_info.name'")
|
| 35 |
-
if "id" not in data["model_info"]:
|
| 36 |
-
raise ValueError(f"{json_path}: Missing required field 'model_info.id'")
|
| 37 |
-
if "developer" not in data["model_info"]:
|
| 38 |
-
raise ValueError(f"{json_path}: Missing required field 'model_info.developer'")
|
| 39 |
-
|
| 40 |
-
leaderboard = data["evaluation_source"]["evaluation_source_name"]
|
| 41 |
-
model = data["model_info"]["id"]
|
| 42 |
-
uuid = json_path.stem
|
| 43 |
-
developer = data["model_info"]["developer"]
|
| 44 |
-
|
| 45 |
-
# Validate evaluation results
|
| 46 |
-
for eval_result in data["evaluation_results"]:
|
| 47 |
-
if "evaluation_name" not in eval_result:
|
| 48 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'")
|
| 49 |
-
if "metric_config" not in eval_result:
|
| 50 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'")
|
| 51 |
-
if "score_details" not in eval_result:
|
| 52 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'")
|
| 53 |
-
|
| 54 |
-
if "lower_is_better" not in eval_result["metric_config"]:
|
| 55 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'")
|
| 56 |
-
if "score" not in eval_result["score_details"]:
|
| 57 |
-
raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'")
|
| 58 |
-
|
| 59 |
-
row = {
|
| 60 |
-
# Folder structure (for reconstruction)
|
| 61 |
-
"_leaderboard": leaderboard,
|
| 62 |
-
"_developer": developer,
|
| 63 |
-
"_model": model,
|
| 64 |
-
"_uuid": uuid,
|
| 65 |
-
|
| 66 |
-
# Required top-level fields
|
| 67 |
-
"schema_version": data["schema_version"],
|
| 68 |
-
"evaluation_id": data["evaluation_id"],
|
| 69 |
-
"retrieved_timestamp": data["retrieved_timestamp"],
|
| 70 |
-
"source_data": json.dumps(data["source_data"]),
|
| 71 |
-
|
| 72 |
-
# Required nested fields
|
| 73 |
-
"evaluation_source_name": data["evaluation_source"]["evaluation_source_name"],
|
| 74 |
-
"evaluation_source_type": data["evaluation_source"]["evaluation_source_type"],
|
| 75 |
-
|
| 76 |
-
"source_organization_name": data["source_metadata"]["source_organization_name"],
|
| 77 |
-
"source_organization_url": data["source_metadata"].get("source_organization_url"),
|
| 78 |
-
"source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"),
|
| 79 |
-
"evaluator_relationship": data["source_metadata"]["evaluator_relationship"],
|
| 80 |
-
|
| 81 |
-
"model_name": data["model_info"]["name"],
|
| 82 |
-
"model_id": data["model_info"]["id"],
|
| 83 |
-
"model_developer": data["model_info"]["developer"],
|
| 84 |
-
"model_inference_platform": data["model_info"].get("inference_platform"),
|
| 85 |
-
|
| 86 |
-
# Store full evaluation_results and additional_details as JSON
|
| 87 |
-
"evaluation_results": json.dumps(data["evaluation_results"]),
|
| 88 |
-
"additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None,
|
| 89 |
-
}
|
| 90 |
-
|
| 91 |
-
return row
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
def add_to_parquet(json_input: str, parquet_file: str):
|
| 95 |
-
"""
|
| 96 |
-
Add JSON(s) to Parquet file.
|
| 97 |
-
Creates new file if it doesn't exist, appends and deduplicates if it does.
|
| 98 |
-
|
| 99 |
-
Args:
|
| 100 |
-
json_input: Path to single JSON file or folder containing JSONs
|
| 101 |
-
parquet_file: Output Parquet file path
|
| 102 |
-
"""
|
| 103 |
-
input_path = Path(json_input)
|
| 104 |
-
|
| 105 |
-
if input_path.is_file():
|
| 106 |
-
json_files = [input_path]
|
| 107 |
-
elif input_path.is_dir():
|
| 108 |
-
json_files = list(input_path.rglob("*.json"))
|
| 109 |
-
else:
|
| 110 |
-
raise ValueError(f"Invalid input: {json_input}")
|
| 111 |
-
|
| 112 |
-
print(f"Processing {len(json_files)} JSON file(s)...")
|
| 113 |
-
|
| 114 |
-
parquet_path = Path(parquet_file)
|
| 115 |
-
if parquet_path.exists():
|
| 116 |
-
existing_df = pd.read_parquet(parquet_file)
|
| 117 |
-
existing_keys = set(
|
| 118 |
-
existing_df[["_leaderboard", "_developer", "_model", "_uuid"]]
|
| 119 |
-
.apply(tuple, axis=1)
|
| 120 |
-
)
|
| 121 |
-
print(f"Found {len(existing_df)} existing rows")
|
| 122 |
-
else:
|
| 123 |
-
existing_df = None
|
| 124 |
-
existing_keys = set()
|
| 125 |
-
|
| 126 |
-
all_rows = []
|
| 127 |
-
skipped = 0
|
| 128 |
-
for i, jf in enumerate(json_files, 1):
|
| 129 |
-
if i % 100 == 0:
|
| 130 |
-
print(f" {i}/{len(json_files)}")
|
| 131 |
-
|
| 132 |
-
row = json_to_row(jf)
|
| 133 |
-
key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"])
|
| 134 |
-
if key not in existing_keys:
|
| 135 |
-
all_rows.append(row)
|
| 136 |
-
existing_keys.add(key)
|
| 137 |
-
else:
|
| 138 |
-
skipped += 1
|
| 139 |
-
|
| 140 |
-
if skipped > 0:
|
| 141 |
-
print(f" Skipped {skipped} duplicate file(s)")
|
| 142 |
-
|
| 143 |
-
new_df = pd.DataFrame(all_rows)
|
| 144 |
-
|
| 145 |
-
if existing_df is not None:
|
| 146 |
-
df = pd.concat([existing_df, new_df], ignore_index=True)
|
| 147 |
-
print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)")
|
| 148 |
-
else:
|
| 149 |
-
df = new_df
|
| 150 |
-
|
| 151 |
-
df.to_parquet(parquet_file, index=False)
|
| 152 |
-
print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)")
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
def parquet_to_folder(parquet_file: str, output_dir: str):
|
| 156 |
-
"""Reconstruct folder structure from Parquet."""
|
| 157 |
-
df = pd.read_parquet(parquet_file)
|
| 158 |
-
out = Path(output_dir)
|
| 159 |
-
|
| 160 |
-
for _, row in df.iterrows():
|
| 161 |
-
lb = row["_leaderboard"]
|
| 162 |
-
dev = row["_developer"]
|
| 163 |
-
model = row["_model"]
|
| 164 |
-
uuid = row["_uuid"]
|
| 165 |
-
|
| 166 |
-
json_data = {
|
| 167 |
-
"schema_version": row["schema_version"],
|
| 168 |
-
"evaluation_id": row["evaluation_id"],
|
| 169 |
-
"retrieved_timestamp": row["retrieved_timestamp"],
|
| 170 |
-
"source_data": json.loads(row["source_data"]),
|
| 171 |
-
"evaluation_source": {
|
| 172 |
-
"evaluation_source_name": row["evaluation_source_name"],
|
| 173 |
-
"evaluation_source_type": row["evaluation_source_type"]
|
| 174 |
-
},
|
| 175 |
-
"source_metadata": {
|
| 176 |
-
"source_organization_name": row["source_organization_name"],
|
| 177 |
-
"evaluator_relationship": row["evaluator_relationship"]
|
| 178 |
-
},
|
| 179 |
-
"model_info": {
|
| 180 |
-
"name": row["model_name"],
|
| 181 |
-
"id": row["model_id"],
|
| 182 |
-
"developer": row["model_developer"]
|
| 183 |
-
},
|
| 184 |
-
"evaluation_results": json.loads(row["evaluation_results"])
|
| 185 |
-
}
|
| 186 |
-
|
| 187 |
-
if pd.notna(row["source_organization_url"]):
|
| 188 |
-
json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"]
|
| 189 |
-
if pd.notna(row["source_organization_logo_url"]):
|
| 190 |
-
json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"]
|
| 191 |
-
|
| 192 |
-
if pd.notna(row["model_inference_platform"]):
|
| 193 |
-
json_data["model_info"]["inference_platform"] = row["model_inference_platform"]
|
| 194 |
-
|
| 195 |
-
if pd.notna(row["additional_details"]):
|
| 196 |
-
json_data["additional_details"] = json.loads(row["additional_details"])
|
| 197 |
-
|
| 198 |
-
file_path = out / lb / dev / model / f"{uuid}.json"
|
| 199 |
-
file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 200 |
-
with open(file_path, 'w') as f:
|
| 201 |
-
json.dump(json_data, f, indent=2)
|
| 202 |
-
|
| 203 |
-
print(f"Reconstructed {len(df)} files to {output_dir}")
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
if __name__ == "__main__":
|
| 207 |
-
import sys
|
| 208 |
-
|
| 209 |
-
if len(sys.argv) < 2:
|
| 210 |
-
print("Usage:")
|
| 211 |
-
print(" python json_to_parquet.py add <json_or_folder> <output.parquet>")
|
| 212 |
-
print(" python json_to_parquet.py export <input.parquet> <output_dir>")
|
| 213 |
-
sys.exit(1)
|
| 214 |
-
|
| 215 |
-
cmd = sys.argv[1]
|
| 216 |
-
|
| 217 |
-
if cmd == "add":
|
| 218 |
-
add_to_parquet(sys.argv[2], sys.argv[3])
|
| 219 |
-
elif cmd == "export":
|
| 220 |
-
parquet_to_folder(sys.argv[2], sys.argv[3])
|
| 221 |
-
else:
|
| 222 |
-
print(f"Unknown command: {cmd}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui_components.py
CHANGED
|
@@ -1,211 +1,838 @@
|
|
| 1 |
"""
|
| 2 |
UI Components: Themes, CSS, and HTML formatters for the Gradio interface.
|
|
|
|
| 3 |
"""
|
| 4 |
import gradio as gr
|
| 5 |
|
| 6 |
|
| 7 |
def get_theme():
|
| 8 |
-
"""Returns the
|
| 9 |
-
return gr.themes.
|
| 10 |
-
primary_hue="
|
| 11 |
neutral_hue="slate",
|
| 12 |
-
font=[gr.themes.GoogleFont("
|
|
|
|
| 13 |
).set(
|
| 14 |
-
body_background_fill="
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
block_border_width="1px",
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
)
|
| 19 |
|
| 20 |
|
| 21 |
def get_custom_css():
|
| 22 |
-
"""Returns custom CSS
|
| 23 |
return """
|
| 24 |
-
/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
.gradio-container {
|
| 26 |
max-width: 100% !important;
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
}
|
| 29 |
|
| 30 |
-
/*
|
| 31 |
-
.
|
| 32 |
-
display:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
}
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
}
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
cursor: pointer;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
}
|
| 44 |
"""
|
| 45 |
|
| 46 |
|
| 47 |
def format_leaderboard_header(selected_leaderboard, metadata):
|
| 48 |
-
"""Formats the leaderboard header info
|
| 49 |
if not selected_leaderboard:
|
| 50 |
return """
|
| 51 |
-
<div style="text-align: center; padding:
|
| 52 |
-
<
|
| 53 |
-
<p>Select a leaderboard above to visualize results and metadata.</p>
|
| 54 |
</div>
|
| 55 |
"""
|
| 56 |
|
| 57 |
if not metadata or not metadata.get("evals"):
|
| 58 |
-
return f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
source_info = metadata.get("source_info", {})
|
| 61 |
org = source_info.get("organization", "Unknown")
|
| 62 |
-
relationship = source_info.get("relationship", "Unknown").replace("_", " ").title()
|
| 63 |
url = source_info.get("url", "#")
|
| 64 |
eval_names = list(metadata["evals"].keys())
|
| 65 |
|
| 66 |
-
|
| 67 |
-
eval_badges = "".join([f"""
|
| 68 |
-
<span style="
|
| 69 |
-
display: inline-block;
|
| 70 |
-
padding: 0.25rem 0.75rem;
|
| 71 |
-
margin: 0.25rem 0.25rem 0.25rem 0;
|
| 72 |
-
background: var(--background-fill-primary);
|
| 73 |
-
border: 1px solid var(--border-color-primary);
|
| 74 |
-
border-radius: 16px;
|
| 75 |
-
font-size: 0.8rem;
|
| 76 |
-
color: var(--body-text-color);
|
| 77 |
-
font-weight: 500;
|
| 78 |
-
">{name}</span>
|
| 79 |
-
""" for name in eval_names])
|
| 80 |
|
| 81 |
return f"""
|
| 82 |
-
<div
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
<div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 1rem;">
|
| 89 |
-
<div style="flex: 1;">
|
| 90 |
-
<h3 style="margin: 0 0 0.5rem 0; font-size: 1.2rem; font-weight: 600; color: var(--body-text-color);">
|
| 91 |
-
{selected_leaderboard}
|
| 92 |
-
</h3>
|
| 93 |
-
<div style="font-size: 0.9rem; color: var(--body-text-color-subdued); margin-bottom: 0.75rem;">
|
| 94 |
-
<span><strong>Source Organization:</strong> {org}</span> •
|
| 95 |
-
<span><strong>Evaluator Relationship:</strong> {relationship}</span>
|
| 96 |
-
</div>
|
| 97 |
-
<div style="margin-top: 0.75rem;">
|
| 98 |
-
<div style="font-size: 0.85rem; font-weight: 600; color: var(--body-text-color); margin-bottom: 0.5rem;">
|
| 99 |
-
Included Evaluations:
|
| 100 |
-
</div>
|
| 101 |
-
<div>{eval_badges}</div>
|
| 102 |
-
</div>
|
| 103 |
</div>
|
| 104 |
<a href="{url}" target="_blank" style="
|
| 105 |
-
font-size: 0.
|
| 106 |
-
color:
|
| 107 |
text-decoration: none;
|
| 108 |
-
padding: 0.
|
|
|
|
| 109 |
border-radius: 6px;
|
| 110 |
-
background: var(--background-fill-primary);
|
| 111 |
-
border: 1px solid var(--border-color-primary);
|
| 112 |
-
transition: all 0.2s;
|
| 113 |
white-space: nowrap;
|
| 114 |
-
">
|
| 115 |
-
Source
|
| 116 |
-
</a>
|
| 117 |
</div>
|
| 118 |
</div>
|
| 119 |
"""
|
| 120 |
|
| 121 |
|
| 122 |
def format_metric_details(selected_leaderboard, metadata):
|
| 123 |
-
"""Formats metric detail cards
|
| 124 |
if not selected_leaderboard or not metadata or not metadata.get("evals"):
|
| 125 |
return ""
|
| 126 |
|
| 127 |
evals = metadata.get("evals", {})
|
| 128 |
|
| 129 |
html = """
|
| 130 |
-
<
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
<div style="
|
| 135 |
-
display: grid;
|
| 136 |
-
grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
|
| 137 |
-
gap: 1rem;
|
| 138 |
-
">
|
| 139 |
"""
|
| 140 |
|
| 141 |
for eval_name, info in evals.items():
|
| 142 |
-
score_type = info['score_type'].upper() if info.get('score_type') else "
|
| 143 |
direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
|
| 144 |
-
|
| 145 |
|
| 146 |
-
|
| 147 |
if info.get('score_type') == "continuous" and info.get('min_score') is not None:
|
| 148 |
-
|
| 149 |
elif info.get('score_type') == "levels" and info.get('level_names'):
|
| 150 |
-
|
| 151 |
-
details_content += f"<div><span style='opacity: 0.7;'>Levels:</span> <strong>{levels}</strong></div>"
|
| 152 |
|
| 153 |
-
if info.get('has_unknown_level'):
|
| 154 |
-
details_content += "<div style='margin-top: 0.25rem; font-size: 0.8rem; opacity: 0.7;'>* -1 indicates Unknown</div>"
|
| 155 |
-
|
| 156 |
html += f"""
|
| 157 |
-
<details
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
overflow: hidden;
|
| 162 |
-
height: fit-content;
|
| 163 |
-
">
|
| 164 |
-
<summary style="
|
| 165 |
-
padding: 0.75rem 1rem;
|
| 166 |
-
cursor: pointer;
|
| 167 |
-
font-weight: 600;
|
| 168 |
-
display: flex;
|
| 169 |
-
align-items: center;
|
| 170 |
-
justify-content: space-between;
|
| 171 |
-
list-style: none;
|
| 172 |
-
font-size: 0.95rem;
|
| 173 |
-
">
|
| 174 |
-
<div style="display: flex; align-items: center; gap: 0.5rem;">
|
| 175 |
-
<span style="font-size: 1.1rem; opacity: 0.8;">🏷️</span>
|
| 176 |
-
<span style="white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{eval_name}</span>
|
| 177 |
-
</div>
|
| 178 |
-
<div style="display: flex; align-items: center; gap: 0.5rem;">
|
| 179 |
-
<span style="font-size: 0.8rem; font-weight: 400; color: var(--body-text-color-subdued); white-space: nowrap;">{direction_icon} {direction}</span>
|
| 180 |
-
</div>
|
| 181 |
</summary>
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
font-size: 0.9rem;
|
| 188 |
-
">
|
| 189 |
-
<p style="margin: 0 0 0.5rem 0; color: var(--body-text-color-subdued); line-height: 1.4;">
|
| 190 |
-
{info['description']}
|
| 191 |
-
</p>
|
| 192 |
-
<div style="display: flex; justify-content: space-between; align-items: flex-end; margin-top: 0.5rem;">
|
| 193 |
-
<div style="font-size: 0.85rem;">
|
| 194 |
-
{details_content}
|
| 195 |
-
</div>
|
| 196 |
-
<span style="
|
| 197 |
-
font-size: 0.7rem;
|
| 198 |
-
padding: 1px 6px;
|
| 199 |
-
border-radius: 4px;
|
| 200 |
-
background: var(--background-fill-primary);
|
| 201 |
-
border: 1px solid var(--border-color-primary);
|
| 202 |
-
color: var(--body-text-color-subdued);
|
| 203 |
-
">{score_type}</span>
|
| 204 |
</div>
|
| 205 |
</div>
|
| 206 |
</details>
|
| 207 |
"""
|
| 208 |
|
| 209 |
-
html += "</div>"
|
| 210 |
return html
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
UI Components: Themes, CSS, and HTML formatters for the Gradio interface.
|
| 3 |
+
Nord color theme with balanced contrast.
|
| 4 |
"""
|
| 5 |
import gradio as gr
|
| 6 |
|
| 7 |
|
| 8 |
def get_theme():
|
| 9 |
+
"""Returns the Nord-themed Gradio theme."""
|
| 10 |
+
return gr.themes.Base(
|
| 11 |
+
primary_hue="blue",
|
| 12 |
neutral_hue="slate",
|
| 13 |
+
font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"],
|
| 14 |
+
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
|
| 15 |
).set(
|
| 16 |
+
body_background_fill="#2E3440",
|
| 17 |
+
body_background_fill_dark="#2E3440",
|
| 18 |
+
body_text_color="#ECEFF4",
|
| 19 |
+
body_text_color_dark="#ECEFF4",
|
| 20 |
+
body_text_color_subdued="#4C566A",
|
| 21 |
+
body_text_color_subdued_dark="#4C566A",
|
| 22 |
+
block_background_fill="#3B4252",
|
| 23 |
+
block_background_fill_dark="#3B4252",
|
| 24 |
block_border_width="1px",
|
| 25 |
+
block_border_color="#434C5E",
|
| 26 |
+
block_border_color_dark="#434C5E",
|
| 27 |
+
block_label_text_color="#D8DEE9",
|
| 28 |
+
block_label_text_color_dark="#D8DEE9",
|
| 29 |
+
block_title_text_color="#ECEFF4",
|
| 30 |
+
block_title_text_color_dark="#ECEFF4",
|
| 31 |
+
input_background_fill="#2E3440",
|
| 32 |
+
input_background_fill_dark="#2E3440",
|
| 33 |
+
input_border_color="#4C566A",
|
| 34 |
+
input_border_color_dark="#4C566A",
|
| 35 |
+
button_primary_background_fill="#88C0D0",
|
| 36 |
+
button_primary_background_fill_dark="#88C0D0",
|
| 37 |
+
button_primary_text_color="#2E3440",
|
| 38 |
+
button_primary_text_color_dark="#2E3440",
|
| 39 |
+
button_secondary_background_fill="#434C5E",
|
| 40 |
+
button_secondary_background_fill_dark="#434C5E",
|
| 41 |
+
button_secondary_text_color="#ECEFF4",
|
| 42 |
+
button_secondary_text_color_dark="#ECEFF4",
|
| 43 |
)
|
| 44 |
|
| 45 |
|
| 46 |
def get_custom_css():
|
| 47 |
+
"""Returns custom CSS with Nord colors."""
|
| 48 |
return """
|
| 49 |
+
/* === Nord Theme ===
|
| 50 |
+
Polar Night: #2E3440 (bg), #3B4252 (surface), #434C5E, #4C566A
|
| 51 |
+
Snow Storm: #D8DEE9, #E5E9F0, #ECEFF4
|
| 52 |
+
Frost: #8FBCBB, #88C0D0, #81A1C1, #5E81AC
|
| 53 |
+
Aurora: #BF616A, #D08770, #EBCB8B, #A3BE8C, #B48EAD
|
| 54 |
+
*/
|
| 55 |
+
|
| 56 |
+
/* === Base === */
|
| 57 |
.gradio-container {
|
| 58 |
max-width: 100% !important;
|
| 59 |
+
margin: 0 !important;
|
| 60 |
+
padding: 1.25rem 2.5rem 2rem !important;
|
| 61 |
+
background: #2E3440 !important;
|
| 62 |
+
color: #ECEFF4 !important;
|
| 63 |
+
font-family: 'DM Sans', system-ui, sans-serif !important;
|
| 64 |
+
font-size: 16px !important;
|
| 65 |
}
|
| 66 |
|
| 67 |
+
/* === Header === */
|
| 68 |
+
.app-header {
|
| 69 |
+
display: flex;
|
| 70 |
+
align-items: center;
|
| 71 |
+
gap: 1rem;
|
| 72 |
+
margin-bottom: 1.5rem;
|
| 73 |
+
padding: 1.25rem 1.5rem;
|
| 74 |
+
background: #3B4252;
|
| 75 |
+
border: 1px solid #434C5E;
|
| 76 |
+
border-radius: 12px;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.app-header .logo-mark {
|
| 80 |
+
width: 48px;
|
| 81 |
+
height: 48px;
|
| 82 |
+
background: linear-gradient(135deg, #88C0D0 0%, #81A1C1 100%);
|
| 83 |
+
border-radius: 12px;
|
| 84 |
+
display: flex;
|
| 85 |
+
align-items: center;
|
| 86 |
+
justify-content: center;
|
| 87 |
+
font-weight: 800;
|
| 88 |
+
font-size: 1.1rem;
|
| 89 |
+
color: #2E3440;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.app-header .brand {
|
| 93 |
+
display: flex;
|
| 94 |
+
flex-direction: column;
|
| 95 |
+
gap: 0.125rem;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.app-header h1 {
|
| 99 |
+
margin: 0;
|
| 100 |
+
font-size: 1.5rem;
|
| 101 |
+
font-weight: 700;
|
| 102 |
+
color: #ECEFF4;
|
| 103 |
+
letter-spacing: -0.02em;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
.app-header .tagline {
|
| 107 |
+
color: #D8DEE9;
|
| 108 |
+
font-size: 0.85rem;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
.app-header .header-right {
|
| 112 |
+
margin-left: auto;
|
| 113 |
+
display: flex;
|
| 114 |
+
align-items: center;
|
| 115 |
+
gap: 0.75rem;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
.app-header .version-badge {
|
| 119 |
+
background: rgba(136, 192, 208, 0.2);
|
| 120 |
+
border: 1px solid rgba(136, 192, 208, 0.4);
|
| 121 |
+
border-radius: 6px;
|
| 122 |
+
padding: 0.25rem 0.625rem;
|
| 123 |
+
font-size: 0.7rem;
|
| 124 |
+
font-family: 'JetBrains Mono', monospace;
|
| 125 |
+
color: #88C0D0;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
/* === Tabs === */
|
| 129 |
+
.tabs {
|
| 130 |
+
border: none !important;
|
| 131 |
+
background: transparent !important;
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
.tab-nav {
|
| 135 |
+
background: #3B4252 !important;
|
| 136 |
+
border: 1px solid #434C5E !important;
|
| 137 |
+
border-radius: 10px !important;
|
| 138 |
+
padding: 0.25rem !important;
|
| 139 |
+
gap: 0.25rem !important;
|
| 140 |
+
margin-bottom: 1.25rem !important;
|
| 141 |
+
display: inline-flex !important;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.tab-nav button {
|
| 145 |
+
background: transparent !important;
|
| 146 |
+
border: none !important;
|
| 147 |
+
color: #D8DEE9 !important;
|
| 148 |
+
padding: 0.75rem 1.5rem !important;
|
| 149 |
+
font-size: 0.95rem !important;
|
| 150 |
+
font-weight: 500 !important;
|
| 151 |
+
border-radius: 8px !important;
|
| 152 |
+
transition: all 0.15s ease !important;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.tab-nav button.selected {
|
| 156 |
+
color: #2E3440 !important;
|
| 157 |
+
background: #88C0D0 !important;
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.tab-nav button:hover:not(.selected) {
|
| 161 |
+
background: #434C5E !important;
|
| 162 |
+
color: #ECEFF4 !important;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
.tabitem {
|
| 166 |
+
background: transparent !important;
|
| 167 |
+
border: none !important;
|
| 168 |
+
padding: 0 !important;
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
/* === Controls bar === */
|
| 172 |
+
.controls-bar {
|
| 173 |
+
background: #3B4252 !important;
|
| 174 |
+
border: 1px solid #434C5E !important;
|
| 175 |
+
border-radius: 10px !important;
|
| 176 |
+
padding: 0.75rem 1.25rem !important;
|
| 177 |
+
margin-bottom: 1rem !important;
|
| 178 |
+
gap: 0.75rem !important;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
.controls-bar label {
|
| 182 |
+
font-size: 0.75rem !important;
|
| 183 |
+
text-transform: uppercase !important;
|
| 184 |
+
letter-spacing: 0.04em !important;
|
| 185 |
+
color: #D8DEE9 !important;
|
| 186 |
+
font-weight: 500 !important;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
/* === Info banner === */
|
| 190 |
+
.info-banner {
|
| 191 |
+
background: #3B4252 !important;
|
| 192 |
+
border: 1px solid #434C5E !important;
|
| 193 |
+
border-left: 3px solid #88C0D0 !important;
|
| 194 |
+
border-radius: 0 10px 10px 0 !important;
|
| 195 |
+
padding: 0.75rem 1rem !important;
|
| 196 |
+
margin-bottom: 1rem !important;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.info-banner h3 {
|
| 200 |
+
margin: 0;
|
| 201 |
+
font-size: 1.1rem;
|
| 202 |
+
font-weight: 600;
|
| 203 |
+
color: #ECEFF4;
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
.info-banner .eval-tags {
|
| 207 |
+
display: flex;
|
| 208 |
+
flex-wrap: wrap;
|
| 209 |
+
gap: 0.375rem;
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
.info-banner .eval-tag {
|
| 213 |
+
background: rgba(143, 188, 187, 0.15);
|
| 214 |
+
border: 1px solid rgba(143, 188, 187, 0.3);
|
| 215 |
+
border-radius: 4px;
|
| 216 |
+
padding: 0.3rem 0.6rem;
|
| 217 |
+
font-size: 0.8rem;
|
| 218 |
+
font-family: 'JetBrains Mono', monospace;
|
| 219 |
+
color: #8FBCBB;
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
/* === Dataframe - seamless styling === */
|
| 223 |
+
.dataframe,
|
| 224 |
+
.dataframe > div,
|
| 225 |
+
.dataframe > div > div,
|
| 226 |
+
.dataframe .table-wrap,
|
| 227 |
+
.dataframe .svelte-1gfkn6j {
|
| 228 |
+
background: #2E3440 !important;
|
| 229 |
+
border: none !important;
|
| 230 |
+
box-shadow: none !important;
|
| 231 |
+
border-radius: 0 !important;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
.dataframe table {
|
| 235 |
+
width: 100% !important;
|
| 236 |
+
border-collapse: collapse !important;
|
| 237 |
+
font-size: 0.95rem !important;
|
| 238 |
+
table-layout: fixed !important;
|
| 239 |
+
background: #2E3440 !important;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
.dataframe thead th:nth-child(1) { width: 28%; }
|
| 243 |
+
.dataframe thead th:nth-child(2) { width: 12%; }
|
| 244 |
+
.dataframe thead th:nth-child(3) { width: 7%; }
|
| 245 |
+
.dataframe thead th:nth-child(4) { width: 7%; }
|
| 246 |
+
.dataframe thead th:nth-child(n+5) { width: auto; }
|
| 247 |
+
|
| 248 |
+
.dataframe thead,
|
| 249 |
+
.dataframe thead tr {
|
| 250 |
+
background: #2E3440 !important;
|
| 251 |
+
position: sticky;
|
| 252 |
+
top: 0;
|
| 253 |
+
z-index: 10;
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
.dataframe thead th {
|
| 257 |
+
padding: 0.875rem 1rem !important;
|
| 258 |
+
font-weight: 600 !important;
|
| 259 |
+
font-size: 0.75rem !important;
|
| 260 |
+
text-transform: uppercase !important;
|
| 261 |
+
letter-spacing: 0.05em !important;
|
| 262 |
+
color: #81A1C1 !important;
|
| 263 |
+
border-bottom: 1px solid #434C5E !important;
|
| 264 |
+
border-top: none !important;
|
| 265 |
+
text-align: left !important;
|
| 266 |
+
background: #2E3440 !important;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
.dataframe tbody,
|
| 270 |
+
.dataframe tbody tr {
|
| 271 |
+
background: #2E3440 !important;
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
.dataframe tbody tr {
|
| 275 |
+
border-bottom: 1px solid #3B4252 !important;
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
.dataframe tbody tr:hover {
|
| 279 |
+
background: rgba(136, 192, 208, 0.04) !important;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.dataframe tbody td {
|
| 283 |
+
padding: 0.75rem 1rem !important;
|
| 284 |
+
color: #E5E9F0 !important;
|
| 285 |
+
background: #2E3440 !important;
|
| 286 |
+
overflow: hidden !important;
|
| 287 |
+
text-overflow: ellipsis !important;
|
| 288 |
+
border: none !important;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
/* === Pagination bar === */
|
| 292 |
+
.pagination-bar {
|
| 293 |
+
margin-top: 1rem !important;
|
| 294 |
+
padding: 1rem 0 !important;
|
| 295 |
+
border-top: 1px solid #3B4252 !important;
|
| 296 |
+
display: flex !important;
|
| 297 |
+
justify-content: center !important;
|
| 298 |
+
align-items: center !important;
|
| 299 |
+
gap: 1rem !important;
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
.page-info {
|
| 303 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 304 |
+
font-size: 1rem !important;
|
| 305 |
+
color: #D8DEE9 !important;
|
| 306 |
+
min-width: 80px !important;
|
| 307 |
+
text-align: center !important;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
/* Model name - white, readable */
|
| 311 |
+
.dataframe tbody td:first-child {
|
| 312 |
+
font-weight: 500 !important;
|
| 313 |
+
color: #ECEFF4 !important;
|
| 314 |
+
white-space: nowrap !important;
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
/* Developer - frost blue */
|
| 318 |
+
.dataframe tbody td:nth-child(2) {
|
| 319 |
+
color: #88C0D0 !important;
|
| 320 |
+
white-space: nowrap !important;
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
/* Params - aurora orange */
|
| 324 |
+
.dataframe tbody td:nth-child(3) {
|
| 325 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 326 |
+
color: #D08770 !important;
|
| 327 |
+
text-align: right !important;
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
/* Average - aurora green */
|
| 331 |
+
.dataframe tbody td:nth-child(4) {
|
| 332 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 333 |
+
font-weight: 600 !important;
|
| 334 |
+
color: #A3BE8C !important;
|
| 335 |
+
text-align: right !important;
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
/* Metrics - frost teal */
|
| 339 |
+
.dataframe tbody td:nth-child(n+5) {
|
| 340 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 341 |
+
text-align: right !important;
|
| 342 |
+
color: #8FBCBB !important;
|
| 343 |
+
white-space: nowrap !important;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
/* === Status text === */
|
| 347 |
+
.status-text {
|
| 348 |
+
font-size: 0.9rem !important;
|
| 349 |
+
color: #D8DEE9 !important;
|
| 350 |
+
padding: 0.5rem 0 !important;
|
| 351 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
/* === Model Card === */
|
| 355 |
+
.model-card-container {
|
| 356 |
+
display: flex;
|
| 357 |
+
flex-direction: column;
|
| 358 |
+
gap: 1.25rem;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
.model-card-header {
|
| 362 |
+
background: #3B4252;
|
| 363 |
+
border: 1px solid #434C5E;
|
| 364 |
+
border-radius: 12px;
|
| 365 |
+
padding: 1.5rem 2rem;
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
.model-card-header h2 {
|
| 369 |
+
margin: 0 0 0.5rem 0;
|
| 370 |
+
font-size: 1.5rem;
|
| 371 |
+
font-weight: 600;
|
| 372 |
+
color: #ECEFF4;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
.model-card-header .model-meta {
|
| 376 |
+
display: flex;
|
| 377 |
+
gap: 1.5rem;
|
| 378 |
+
color: #D8DEE9;
|
| 379 |
+
font-size: 0.95rem;
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
.model-card-header .model-meta strong {
|
| 383 |
+
color: #8FBCBB;
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
.leaderboard-section {
|
| 387 |
+
background: #3B4252;
|
| 388 |
+
border: 1px solid #434C5E;
|
| 389 |
+
border-radius: 10px;
|
| 390 |
+
overflow: hidden;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.leaderboard-section-header {
|
| 394 |
+
background: #434C5E;
|
| 395 |
+
padding: 1rem 1.25rem;
|
| 396 |
+
border-bottom: 1px solid #4C566A;
|
| 397 |
+
display: flex;
|
| 398 |
+
justify-content: space-between;
|
| 399 |
+
align-items: center;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
.leaderboard-section-header h3 {
|
| 403 |
+
margin: 0;
|
| 404 |
+
font-size: 1rem;
|
| 405 |
+
font-weight: 600;
|
| 406 |
+
color: #88C0D0;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
.leaderboard-section-header .lb-avg {
|
| 410 |
+
background: rgba(163, 190, 140, 0.15);
|
| 411 |
+
border: 1px solid rgba(163, 190, 140, 0.3);
|
| 412 |
+
border-radius: 8px;
|
| 413 |
+
padding: 0.5rem 1rem;
|
| 414 |
+
font-size: 0.85rem;
|
| 415 |
+
color: #D8DEE9;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
.leaderboard-section-header .lb-avg strong {
|
| 419 |
+
color: #A3BE8C;
|
| 420 |
+
font-family: 'JetBrains Mono', monospace;
|
| 421 |
+
font-size: 1.1rem;
|
| 422 |
+
font-weight: 700;
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
.scores-grid {
|
| 426 |
+
display: grid;
|
| 427 |
+
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
|
| 428 |
+
gap: 1px;
|
| 429 |
+
background: #434C5E;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
.score-item {
|
| 433 |
+
background: #3B4252;
|
| 434 |
+
padding: 1rem 1.25rem;
|
| 435 |
+
}
|
| 436 |
+
|
| 437 |
+
.score-item .score-label {
|
| 438 |
+
font-size: 0.8rem;
|
| 439 |
+
text-transform: uppercase;
|
| 440 |
+
letter-spacing: 0.05em;
|
| 441 |
+
color: #D8DEE9;
|
| 442 |
+
margin-bottom: 0.375rem;
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
.score-item .score-value {
|
| 446 |
+
font-size: 1.5rem;
|
| 447 |
+
font-weight: 600;
|
| 448 |
+
font-family: 'JetBrains Mono', monospace;
|
| 449 |
+
color: #A3BE8C;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
.score-item.highlight .score-value {
|
| 453 |
+
color: #88C0D0;
|
| 454 |
}
|
| 455 |
|
| 456 |
+
.no-results {
|
| 457 |
+
text-align: center;
|
| 458 |
+
padding: 3rem 1rem;
|
| 459 |
+
color: #D8DEE9;
|
| 460 |
}
|
| 461 |
|
| 462 |
+
.no-results h3 {
|
| 463 |
+
color: #ECEFF4;
|
| 464 |
+
margin-bottom: 0.5rem;
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
/* === Buttons === */
|
| 468 |
+
button {
|
| 469 |
+
border-radius: 8px !important;
|
| 470 |
+
font-weight: 500 !important;
|
| 471 |
+
font-size: 0.95rem !important;
|
| 472 |
+
transition: all 0.15s ease !important;
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
button.primary {
|
| 476 |
+
background: #88C0D0 !important;
|
| 477 |
+
color: #2E3440 !important;
|
| 478 |
+
border: none !important;
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
button.primary:hover:not(:disabled) {
|
| 482 |
+
background: #8FBCBB !important;
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
button.secondary,
|
| 486 |
+
button[variant="secondary"] {
|
| 487 |
+
background: #434C5E !important;
|
| 488 |
+
color: #ECEFF4 !important;
|
| 489 |
+
border: 1px solid #4C566A !important;
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
button.secondary:hover:not(:disabled),
|
| 493 |
+
button[variant="secondary"]:hover:not(:disabled) {
|
| 494 |
+
background: #4C566A !important;
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
+
button:disabled {
|
| 498 |
+
opacity: 0.35 !important;
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
/* === Inputs === */
|
| 502 |
+
input[type="text"],
|
| 503 |
+
select {
|
| 504 |
+
background: #2E3440 !important;
|
| 505 |
+
border: 1px solid #4C566A !important;
|
| 506 |
+
border-radius: 8px !important;
|
| 507 |
+
color: #ECEFF4 !important;
|
| 508 |
+
font-size: 1rem !important;
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
+
input[type="text"]:focus,
|
| 512 |
+
select:focus {
|
| 513 |
+
border-color: #88C0D0 !important;
|
| 514 |
+
box-shadow: 0 0 0 3px rgba(136, 192, 208, 0.15) !important;
|
| 515 |
+
outline: none !important;
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
input::placeholder {
|
| 519 |
+
color: #4C566A !important;
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
/* === Accordion === */
|
| 523 |
+
.accordion {
|
| 524 |
+
background: #3B4252 !important;
|
| 525 |
+
border: 1px solid #434C5E !important;
|
| 526 |
+
border-radius: 10px !important;
|
| 527 |
+
margin-top: 1.5rem !important;
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
.accordion > .label-wrap {
|
| 531 |
+
background: transparent !important;
|
| 532 |
+
padding: 1rem 1.25rem !important;
|
| 533 |
+
color: #D8DEE9 !important;
|
| 534 |
+
font-size: 0.95rem !important;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
.accordion > .wrap {
|
| 538 |
+
padding: 0.5rem 1.25rem 1.25rem !important;
|
| 539 |
+
color: #D8DEE9 !important;
|
| 540 |
+
font-size: 0.95rem !important;
|
| 541 |
+
line-height: 1.6 !important;
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
.accordion code {
|
| 545 |
+
background: #434C5E !important;
|
| 546 |
+
padding: 0.125rem 0.375rem !important;
|
| 547 |
+
border-radius: 4px !important;
|
| 548 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 549 |
+
font-size: 0.8rem !important;
|
| 550 |
+
color: #8FBCBB !important;
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
/* === Metrics section === */
|
| 554 |
+
.metrics-section {
|
| 555 |
+
margin-top: 1.5rem;
|
| 556 |
+
padding-top: 1.5rem;
|
| 557 |
+
border-top: 1px solid #434C5E;
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
.metrics-section h3 {
|
| 561 |
+
font-size: 0.85rem;
|
| 562 |
+
font-weight: 600;
|
| 563 |
+
color: #D8DEE9;
|
| 564 |
+
margin: 0 0 1rem 0;
|
| 565 |
+
text-transform: uppercase;
|
| 566 |
+
letter-spacing: 0.05em;
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
.metrics-grid {
|
| 570 |
+
display: grid;
|
| 571 |
+
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
|
| 572 |
+
gap: 0.75rem;
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
.metric-card {
|
| 576 |
+
background: #3B4252;
|
| 577 |
+
border: 1px solid #434C5E;
|
| 578 |
+
border-radius: 8px;
|
| 579 |
+
overflow: hidden;
|
| 580 |
+
}
|
| 581 |
+
|
| 582 |
+
.metric-card-header {
|
| 583 |
+
display: flex;
|
| 584 |
+
justify-content: space-between;
|
| 585 |
+
align-items: center;
|
| 586 |
+
padding: 0.75rem 1rem;
|
| 587 |
cursor: pointer;
|
| 588 |
+
list-style: none;
|
| 589 |
+
}
|
| 590 |
+
|
| 591 |
+
.metric-card-header::-webkit-details-marker {
|
| 592 |
+
display: none;
|
| 593 |
+
}
|
| 594 |
+
|
| 595 |
+
.metric-card-name {
|
| 596 |
+
font-weight: 500;
|
| 597 |
+
font-size: 0.95rem;
|
| 598 |
+
color: #ECEFF4;
|
| 599 |
+
}
|
| 600 |
+
|
| 601 |
+
.metric-card-direction {
|
| 602 |
+
font-size: 0.8rem;
|
| 603 |
+
color: #D8DEE9;
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
.metric-card-direction .arrow {
|
| 607 |
+
color: #A3BE8C;
|
| 608 |
+
font-weight: 600;
|
| 609 |
+
}
|
| 610 |
+
|
| 611 |
+
.metric-card-body {
|
| 612 |
+
padding: 0.875rem 1.25rem;
|
| 613 |
+
border-top: 1px solid #434C5E;
|
| 614 |
+
font-size: 0.9rem;
|
| 615 |
+
color: #D8DEE9;
|
| 616 |
+
line-height: 1.5;
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
.metric-type-badge {
|
| 620 |
+
font-size: 0.65rem;
|
| 621 |
+
text-transform: uppercase;
|
| 622 |
+
letter-spacing: 0.05em;
|
| 623 |
+
padding: 0.15rem 0.4rem;
|
| 624 |
+
background: rgba(180, 142, 173, 0.2);
|
| 625 |
+
border: 1px solid rgba(180, 142, 173, 0.35);
|
| 626 |
+
border-radius: 4px;
|
| 627 |
+
color: #B48EAD;
|
| 628 |
+
font-family: 'JetBrains Mono', monospace;
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
/* === Scrollbar === */
|
| 632 |
+
::-webkit-scrollbar {
|
| 633 |
+
width: 8px;
|
| 634 |
+
height: 8px;
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
::-webkit-scrollbar-track {
|
| 638 |
+
background: #2E3440;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
::-webkit-scrollbar-thumb {
|
| 642 |
+
background: #4C566A;
|
| 643 |
+
border-radius: 4px;
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
::-webkit-scrollbar-thumb:hover {
|
| 647 |
+
background: #5E81AC;
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
/* === Responsive === */
|
| 651 |
+
@media (max-width: 768px) {
|
| 652 |
+
.gradio-container {
|
| 653 |
+
padding: 1rem !important;
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
.scores-grid {
|
| 657 |
+
grid-template-columns: repeat(2, 1fr);
|
| 658 |
+
}
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
/* === Overrides === */
|
| 662 |
+
.gradio-container footer {
|
| 663 |
+
display: none !important;
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
.block {
|
| 667 |
+
background: #3B4252 !important;
|
| 668 |
+
}
|
| 669 |
+
|
| 670 |
+
.gradio-radio label {
|
| 671 |
+
background: #434C5E !important;
|
| 672 |
+
border: 1px solid #4C566A !important;
|
| 673 |
+
color: #ECEFF4 !important;
|
| 674 |
+
border-radius: 8px !important;
|
| 675 |
+
font-size: 0.85rem !important;
|
| 676 |
+
}
|
| 677 |
+
|
| 678 |
+
.gradio-radio label.selected {
|
| 679 |
+
background: #88C0D0 !important;
|
| 680 |
+
border-color: #88C0D0 !important;
|
| 681 |
+
color: #2E3440 !important;
|
| 682 |
}
|
| 683 |
"""
|
| 684 |
|
| 685 |
|
| 686 |
def format_leaderboard_header(selected_leaderboard, metadata):
|
| 687 |
+
"""Formats the leaderboard header info section."""
|
| 688 |
if not selected_leaderboard:
|
| 689 |
return """
|
| 690 |
+
<div style="text-align: center; padding: 2rem 1rem; color: #D8DEE9;">
|
| 691 |
+
<div style="font-size: 1.1rem;">Select a leaderboard to explore</div>
|
|
|
|
| 692 |
</div>
|
| 693 |
"""
|
| 694 |
|
| 695 |
if not metadata or not metadata.get("evals"):
|
| 696 |
+
return f"""
|
| 697 |
+
<div class="info-banner">
|
| 698 |
+
<h3>{selected_leaderboard}</h3>
|
| 699 |
+
</div>
|
| 700 |
+
"""
|
| 701 |
|
| 702 |
source_info = metadata.get("source_info", {})
|
| 703 |
org = source_info.get("organization", "Unknown")
|
|
|
|
| 704 |
url = source_info.get("url", "#")
|
| 705 |
eval_names = list(metadata["evals"].keys())
|
| 706 |
|
| 707 |
+
eval_tags = "".join([f'<span class="eval-tag">{name}</span>' for name in eval_names])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
|
| 709 |
return f"""
|
| 710 |
+
<div class="info-banner">
|
| 711 |
+
<div style="display: flex; justify-content: space-between; align-items: center; gap: 1rem;">
|
| 712 |
+
<div style="display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;">
|
| 713 |
+
<h3 style="margin: 0;">{selected_leaderboard}</h3>
|
| 714 |
+
<span style="color: #D8DEE9; font-size: 0.8rem;">by {org}</span>
|
| 715 |
+
<div class="eval-tags" style="margin: 0;">{eval_tags}</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 716 |
</div>
|
| 717 |
<a href="{url}" target="_blank" style="
|
| 718 |
+
font-size: 0.75rem;
|
| 719 |
+
color: #88C0D0;
|
| 720 |
text-decoration: none;
|
| 721 |
+
padding: 0.375rem 0.75rem;
|
| 722 |
+
border: 1px solid rgba(136, 192, 208, 0.4);
|
| 723 |
border-radius: 6px;
|
|
|
|
|
|
|
|
|
|
| 724 |
white-space: nowrap;
|
| 725 |
+
">Source →</a>
|
|
|
|
|
|
|
| 726 |
</div>
|
| 727 |
</div>
|
| 728 |
"""
|
| 729 |
|
| 730 |
|
| 731 |
def format_metric_details(selected_leaderboard, metadata):
|
| 732 |
+
"""Formats metric detail cards."""
|
| 733 |
if not selected_leaderboard or not metadata or not metadata.get("evals"):
|
| 734 |
return ""
|
| 735 |
|
| 736 |
evals = metadata.get("evals", {})
|
| 737 |
|
| 738 |
html = """
|
| 739 |
+
<div class="metrics-section">
|
| 740 |
+
<h3>Metric Reference</h3>
|
| 741 |
+
<div class="metrics-grid">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 742 |
"""
|
| 743 |
|
| 744 |
for eval_name, info in evals.items():
|
| 745 |
+
score_type = info['score_type'].upper() if info.get('score_type') else "—"
|
| 746 |
direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
|
| 747 |
+
arrow = "↓" if info.get('lower_is_better') else "↑"
|
| 748 |
|
| 749 |
+
details = ""
|
| 750 |
if info.get('score_type') == "continuous" and info.get('min_score') is not None:
|
| 751 |
+
details = f"Range: [{info['min_score']} – {info['max_score']}]"
|
| 752 |
elif info.get('score_type') == "levels" and info.get('level_names'):
|
| 753 |
+
details = f"Levels: {', '.join(str(l) for l in info['level_names'])}"
|
|
|
|
| 754 |
|
|
|
|
|
|
|
|
|
|
| 755 |
html += f"""
|
| 756 |
+
<details class="metric-card">
|
| 757 |
+
<summary class="metric-card-header">
|
| 758 |
+
<span class="metric-card-name">{eval_name}</span>
|
| 759 |
+
<span class="metric-card-direction"><span class="arrow">{arrow}</span> {direction}</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
</summary>
|
| 761 |
+
<div class="metric-card-body">
|
| 762 |
+
<div>{info.get('description', 'No description')}</div>
|
| 763 |
+
<div style="display: flex; justify-content: space-between; align-items: center; margin-top: 0.5rem;">
|
| 764 |
+
<span style="font-size: 0.75rem; color: #D8DEE9;">{details}</span>
|
| 765 |
+
<span class="metric-type-badge">{score_type}</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
</div>
|
| 767 |
</div>
|
| 768 |
</details>
|
| 769 |
"""
|
| 770 |
|
| 771 |
+
html += "</div></div>"
|
| 772 |
return html
|
| 773 |
|
| 774 |
+
|
| 775 |
+
def format_model_card(model_name, model_data):
|
| 776 |
+
"""Formats a model card showing all evals across leaderboards."""
|
| 777 |
+
if not model_data:
|
| 778 |
+
return """
|
| 779 |
+
<div class="no-results">
|
| 780 |
+
<h3>No results found</h3>
|
| 781 |
+
<p>Try searching for a different model name</p>
|
| 782 |
+
</div>
|
| 783 |
+
"""
|
| 784 |
+
|
| 785 |
+
first = list(model_data.values())[0]
|
| 786 |
+
developer = first.get("developer", "Unknown")
|
| 787 |
+
params = first.get("params")
|
| 788 |
+
arch = first.get("architecture", "Unknown")
|
| 789 |
+
|
| 790 |
+
params_str = f"{params}B" if params else "—"
|
| 791 |
+
|
| 792 |
+
html = f"""
|
| 793 |
+
<div class="model-card-container">
|
| 794 |
+
<div class="model-card-header">
|
| 795 |
+
<h2>{model_name}</h2>
|
| 796 |
+
<div class="model-meta">
|
| 797 |
+
<span><strong>Developer:</strong> {developer}</span>
|
| 798 |
+
<span><strong>Parameters:</strong> {params_str}</span>
|
| 799 |
+
<span><strong>Architecture:</strong> {arch}</span>
|
| 800 |
+
</div>
|
| 801 |
+
</div>
|
| 802 |
+
"""
|
| 803 |
+
|
| 804 |
+
for leaderboard_name, data in model_data.items():
|
| 805 |
+
results = data.get("results", {})
|
| 806 |
+
if not results:
|
| 807 |
+
continue
|
| 808 |
+
|
| 809 |
+
scores = [v for v in results.values() if v is not None]
|
| 810 |
+
avg = sum(scores) / len(scores) if scores else None
|
| 811 |
+
avg_str = f"{avg:.3f}" if avg else "—"
|
| 812 |
+
|
| 813 |
+
html += f"""
|
| 814 |
+
<div class="leaderboard-section">
|
| 815 |
+
<div class="leaderboard-section-header">
|
| 816 |
+
<h3>{leaderboard_name}</h3>
|
| 817 |
+
<span class="lb-avg">Avg: <strong>{avg_str}</strong></span>
|
| 818 |
+
</div>
|
| 819 |
+
<div class="scores-grid">
|
| 820 |
+
"""
|
| 821 |
+
|
| 822 |
+
sorted_results = sorted(results.items(), key=lambda x: x[1] if x[1] is not None else 0, reverse=True)
|
| 823 |
+
|
| 824 |
+
for i, (metric_name, score) in enumerate(sorted_results):
|
| 825 |
+
score_display = f"{score:.3f}" if score is not None else "—"
|
| 826 |
+
highlight_class = "highlight" if i == 0 else ""
|
| 827 |
+
|
| 828 |
+
html += f"""
|
| 829 |
+
<div class="score-item {highlight_class}">
|
| 830 |
+
<div class="score-label">{metric_name}</div>
|
| 831 |
+
<div class="score-value">{score_display}</div>
|
| 832 |
+
</div>
|
| 833 |
+
"""
|
| 834 |
+
|
| 835 |
+
html += "</div></div>"
|
| 836 |
+
|
| 837 |
+
html += "</div>"
|
| 838 |
+
return html
|
upload_to_hf.py
DELETED
|
@@ -1,122 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Upload changed parquet files to HuggingFace dataset.
|
| 3 |
-
|
| 4 |
-
This script:
|
| 5 |
-
1. Reads the manifest of changed leaderboards
|
| 6 |
-
2. Uploads ONLY the changed parquet files
|
| 7 |
-
3. Uses HfApi for efficient individual file uploads
|
| 8 |
-
|
| 9 |
-
Usage:
|
| 10 |
-
# With HF_TOKEN environment variable (GitHub Actions):
|
| 11 |
-
python upload_to_hf.py
|
| 12 |
-
|
| 13 |
-
# Interactive login (local):
|
| 14 |
-
python upload_to_hf.py --login
|
| 15 |
-
"""
|
| 16 |
-
|
| 17 |
-
from huggingface_hub import login, HfFolder, HfApi
|
| 18 |
-
import pandas as pd
|
| 19 |
-
from pathlib import Path
|
| 20 |
-
import sys
|
| 21 |
-
import os
|
| 22 |
-
import json
|
| 23 |
-
|
| 24 |
-
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "deepmage121/eee_test")
|
| 25 |
-
PARQUET_DIR = Path("parquet_output")
|
| 26 |
-
MANIFEST_PATH = PARQUET_DIR / "changed_leaderboards.json"
|
| 27 |
-
|
| 28 |
-
def upload_changed_parquets():
|
| 29 |
-
"""
|
| 30 |
-
Upload only changed parquet files from manifest.
|
| 31 |
-
"""
|
| 32 |
-
|
| 33 |
-
hf_token = os.environ.get("HF_TOKEN")
|
| 34 |
-
if hf_token:
|
| 35 |
-
print("Using HF_TOKEN from environment")
|
| 36 |
-
HfFolder.save_token(hf_token)
|
| 37 |
-
elif "--login" in sys.argv:
|
| 38 |
-
print("Logging in to HuggingFace...")
|
| 39 |
-
login()
|
| 40 |
-
else:
|
| 41 |
-
if not HfFolder.get_token():
|
| 42 |
-
print("ERROR: Not logged in. Run with --login flag or set HF_TOKEN environment variable")
|
| 43 |
-
sys.exit(1)
|
| 44 |
-
print("Using existing HuggingFace token")
|
| 45 |
-
|
| 46 |
-
api = HfApi()
|
| 47 |
-
|
| 48 |
-
if not MANIFEST_PATH.exists():
|
| 49 |
-
print(f"ERROR: No manifest found at {MANIFEST_PATH}")
|
| 50 |
-
print("Run convert_changed_to_parquet.py first to generate the manifest")
|
| 51 |
-
sys.exit(1)
|
| 52 |
-
|
| 53 |
-
manifest = json.loads(MANIFEST_PATH.read_text())
|
| 54 |
-
converted_leaderboards = manifest.get("converted", [])
|
| 55 |
-
|
| 56 |
-
if not converted_leaderboards:
|
| 57 |
-
print("\nNo changed leaderboards to upload (per manifest)")
|
| 58 |
-
sys.exit(0)
|
| 59 |
-
|
| 60 |
-
print(f"\nManifest found: {len(converted_leaderboards)} leaderboard(s) to upload")
|
| 61 |
-
|
| 62 |
-
files_to_upload = [
|
| 63 |
-
PARQUET_DIR / f"{lb}.parquet"
|
| 64 |
-
for lb in converted_leaderboards
|
| 65 |
-
]
|
| 66 |
-
|
| 67 |
-
files_to_upload = [f for f in files_to_upload if f.exists()]
|
| 68 |
-
|
| 69 |
-
if not files_to_upload:
|
| 70 |
-
print(f"ERROR: No parquet files to upload in {PARQUET_DIR}")
|
| 71 |
-
sys.exit(1)
|
| 72 |
-
|
| 73 |
-
print(f"\nUploading {len(files_to_upload)} parquet file(s):")
|
| 74 |
-
for pf in files_to_upload:
|
| 75 |
-
print(f" - {pf.stem}")
|
| 76 |
-
|
| 77 |
-
uploaded_count = 0
|
| 78 |
-
error_count = 0
|
| 79 |
-
|
| 80 |
-
for parquet_file in files_to_upload:
|
| 81 |
-
leaderboard_name = parquet_file.stem
|
| 82 |
-
|
| 83 |
-
path_in_repo = f"data/{leaderboard_name}/data-00000-of-00001.parquet"
|
| 84 |
-
|
| 85 |
-
try:
|
| 86 |
-
print(f"\nUploading: {leaderboard_name}")
|
| 87 |
-
|
| 88 |
-
df = pd.read_parquet(parquet_file)
|
| 89 |
-
print(f" {len(df)} rows, {len(df.columns)} columns")
|
| 90 |
-
|
| 91 |
-
api.upload_file(
|
| 92 |
-
path_or_fileobj=str(parquet_file),
|
| 93 |
-
path_in_repo=path_in_repo,
|
| 94 |
-
repo_id=HF_DATASET_REPO,
|
| 95 |
-
repo_type="dataset",
|
| 96 |
-
commit_message=f"Update {leaderboard_name} leaderboard data"
|
| 97 |
-
)
|
| 98 |
-
|
| 99 |
-
print(f" SUCCESS: Uploaded → {path_in_repo}")
|
| 100 |
-
uploaded_count += 1
|
| 101 |
-
|
| 102 |
-
except Exception as e:
|
| 103 |
-
print(f" ERROR: Error uploading {leaderboard_name}: {e}")
|
| 104 |
-
error_count += 1
|
| 105 |
-
|
| 106 |
-
print(f"\n{'='*70}")
|
| 107 |
-
print(f"Upload Summary:")
|
| 108 |
-
print(f"{'='*70}")
|
| 109 |
-
print(f" Successfully uploaded: {uploaded_count} file(s)")
|
| 110 |
-
print(f" Errors: {error_count} file(s)")
|
| 111 |
-
print(f"{'='*70}")
|
| 112 |
-
|
| 113 |
-
if error_count > 0:
|
| 114 |
-
print(f"\nWARNING: {error_count} file(s) failed to upload")
|
| 115 |
-
sys.exit(1)
|
| 116 |
-
|
| 117 |
-
print(f"\nSuccessfully uploaded to HuggingFace!")
|
| 118 |
-
print(f"View at: https://huggingface.co/datasets/{HF_DATASET_REPO}")
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
if __name__ == "__main__":
|
| 122 |
-
upload_changed_parquets()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|