deepmage121 commited on
Commit
b5c23a1
·
1 Parent(s): d0ab546

major changes to space

Browse files
.github/workflows/sync-to-hf.yml DELETED
@@ -1,55 +0,0 @@
1
- name: Sync to HuggingFace Dataset
2
-
3
- on:
4
- push:
5
- branches: [main]
6
- paths:
7
- - 'data/**/*.json'
8
- workflow_dispatch: # Allow manual trigger
9
-
10
- jobs:
11
- sync-to-huggingface:
12
- runs-on: ubuntu-latest
13
-
14
- steps:
15
- - name: Checkout repository
16
- uses: actions/checkout@v4
17
- with:
18
- fetch-depth: 2
19
-
20
- - name: Set up Python
21
- uses: actions/setup-python@v5
22
- with:
23
- python-version: '3.11'
24
-
25
- - name: Install dependencies
26
- run: |
27
- pip install datasets huggingface_hub pandas pyarrow
28
-
29
- - name: Convert Changed JSONs to Parquet (Optimized)
30
- env:
31
- HF_DATASET_REPO: deepmage121/eee_test
32
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
33
- run: |
34
- echo "Detecting changed leaderboards..."
35
- python scripts/convert_to_parquet.py
36
-
37
- - name: Upload Changed Parquets to HuggingFace
38
- env:
39
- HF_DATASET_REPO: deepmage121/eee_test
40
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
41
- run: |
42
- echo "Uploading changed parquets..."
43
- python scripts/upload_to_hf.py
44
-
45
- - name: Report status
46
- if: success()
47
- run: |
48
- echo "Successfully synced to HuggingFace dataset"
49
- echo "View at: https://huggingface.co/datasets/deepmage121/eee_test"
50
- if [ -f parquet_output/changed_leaderboards.json ]; then
51
- echo ""
52
- echo "Changes processed:"
53
- cat parquet_output/changed_leaderboards.json
54
- fi
55
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -6,66 +6,40 @@ import gradio as gr
6
  import pandas as pd
7
  from pathlib import Path
8
 
9
- # Import custom modules
10
  from data_loader import (
11
  load_hf_dataset_on_startup,
12
  get_available_leaderboards,
13
  get_eval_metadata,
14
  build_leaderboard_table,
15
  clear_cache,
 
 
16
  DATA_DIR
17
  )
18
- from ui_components import get_theme, get_custom_css, format_leaderboard_header, format_metric_details
 
 
 
 
 
 
19
 
 
20
 
21
- def export_leaderboard_to_json(selected_leaderboard):
22
- """Export current leaderboard to JSON files in a zip using parquet_to_folder."""
23
- if not selected_leaderboard:
24
- return None
25
-
26
- import tempfile
27
- import shutil
28
- import zipfile
29
- from json_to_parquet import parquet_to_folder
30
-
31
- try:
32
- # Find the parquet file in DATA_DIR
33
- parquet_path = DATA_DIR / selected_leaderboard / f"{selected_leaderboard}.parquet"
34
-
35
- if not parquet_path.exists():
36
- print(f"Parquet file not found: {parquet_path}")
37
- return None
38
-
39
- # Create temp directory for export
40
- with tempfile.TemporaryDirectory() as temp_dir:
41
- temp_path = Path(temp_dir)
42
- output_dir = temp_path / "json_export"
43
- output_dir.mkdir()
44
-
45
- # Use the round-trip functionality from json_to_parquet
46
- parquet_to_folder(str(parquet_path), str(output_dir))
47
-
48
- # Create zip file
49
- zip_path = temp_path / f"{selected_leaderboard}_export.zip"
50
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
51
- for json_file in output_dir.rglob("*.json"):
52
- arcname = json_file.relative_to(output_dir)
53
- zipf.write(json_file, arcname)
54
-
55
- # Copy to a permanent location for download
56
- final_zip = Path(tempfile.gettempdir()) / f"{selected_leaderboard}_export.zip"
57
- shutil.copy(zip_path, final_zip)
58
-
59
- return str(final_zip)
60
- except Exception as e:
61
- print(f"Export error: {e}")
62
- return None
63
 
64
-
65
- def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.Progress()):
66
  """Loads and aggregates data for the selected leaderboard."""
67
  if not selected_leaderboard:
68
- return pd.DataFrame(), "", format_leaderboard_header(None, {}), format_metric_details(None, {})
 
 
 
 
 
 
 
 
 
69
 
70
  metadata = get_eval_metadata(selected_leaderboard)
71
 
@@ -73,110 +47,261 @@ def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.
73
  progress(value, desc=desc)
74
 
75
  df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
76
- total_count = len(df)
77
 
78
- # Apply search filter (searches all columns)
79
  if search_query and not df.empty:
80
  mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
81
  df = df[mask]
82
 
83
- # Build search status message
84
- if search_query:
85
- search_msg = f"Showing {len(df)} of {total_count} results for '{search_query}'"
86
- else:
87
- search_msg = f"Showing {len(df)} results"
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- return df, search_msg, format_leaderboard_header(selected_leaderboard, metadata), format_metric_details(selected_leaderboard, metadata)
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
 
92
- # Load HF dataset BEFORE building the interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  load_hf_dataset_on_startup()
94
 
95
- # Build Gradio interface
96
- with gr.Blocks(title="Eval Leaderboard", theme=get_theme(), css=get_custom_css()) as demo:
97
 
98
- with gr.Row(variant="compact", elem_classes="header-row"):
99
- with gr.Column(scale=1):
100
- gr.Markdown("# 🏆 Evaluation Leaderboard")
101
- gr.Markdown("Analyze and compare model performance metrics.", elem_classes="subtitle")
 
 
 
 
 
 
 
 
 
102
 
103
- with gr.Row(variant="panel"):
104
- initial_choices = get_available_leaderboards()
105
- initial_value = initial_choices[0] if initial_choices else None
106
-
107
- with gr.Column(scale=3):
108
- leaderboard_selector = gr.Dropdown(
109
- choices=initial_choices,
110
- value=initial_value,
111
- label="Current Leaderboard",
112
- interactive=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  )
114
- with gr.Column(scale=3):
115
- search_box = gr.Textbox(
116
- label="Search",
117
- placeholder="Type to search across all columns...",
118
- show_label=False
119
- )
120
- with gr.Column(scale=1):
121
- refresh_btn = gr.Button("🔄 Refresh", variant="secondary", size="sm")
122
-
123
- with gr.Accordion("ℹ️ How to Submit Data", open=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  gr.Markdown("""
125
- ### Submitting Evaluation Data
126
-
127
- **Data submissions happen via GitHub Pull Requests:**
128
-
129
- 1. **Fork** [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
130
- 2. **Add your JSON files** to `data/<leaderboard>/<developer>/<model>/`
131
- 3. **Create a Pull Request**
132
- 4. **Automated validation** checks your data
133
- 5. **After merge**: GitHub Actions automatically syncs to HuggingFace
134
- 6. **Refresh this page** to see your data!
135
 
136
- #### File Structure
137
- ```
138
- data/
139
- └── YourBenchmark/
140
- └── developer_name/
141
- └── model_name/
142
- └── {uuid}.json
143
- ```
144
 
145
- Each JSON file should follow the schema and be named with a unique UUID.
146
-
147
- 📖 [**Full Submission Guide**](https://github.com/evaleval/every_eval_ever#contributor-guide) |
148
- 📋 [**JSON Schema**](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) |
149
- 👀 [**See Examples**](https://github.com/evaleval/every_eval_ever/tree/main/data)
150
  """)
151
-
152
- init_df, init_search_msg, init_header, init_metrics = update_leaderboard_table(initial_value)
153
 
154
- header_view = gr.HTML(value=init_header)
 
 
155
 
156
- search_info = gr.Markdown(value=init_search_msg)
 
157
 
158
- leaderboard_table = gr.Dataframe(
159
- value=init_df,
160
- label=None,
161
- interactive=False,
162
- wrap=True,
163
- elem_classes="dataframe"
164
- )
165
-
166
- metrics_view = gr.HTML(value=init_metrics)
167
 
 
 
168
 
169
- # Event handlers
170
  leaderboard_selector.change(
 
 
 
 
171
  fn=update_leaderboard_table,
172
- inputs=[leaderboard_selector, search_box],
173
- outputs=[leaderboard_table, search_info, header_view, metrics_view]
174
  )
175
 
176
  search_box.input(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  fn=update_leaderboard_table,
178
- inputs=[leaderboard_selector, search_box],
179
- outputs=[leaderboard_table, search_info, header_view, metrics_view]
180
  )
181
 
182
  refresh_btn.click(
@@ -184,10 +309,40 @@ Each JSON file should follow the schema and be named with a unique UUID.
184
  outputs=[leaderboard_selector]
185
  ).then(
186
  fn=lambda: clear_cache()
 
 
 
 
187
  ).then(
188
  fn=update_leaderboard_table,
189
- inputs=[leaderboard_selector, search_box],
190
- outputs=[leaderboard_table, search_info, header_view, metrics_view]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  )
192
 
193
  DATA_DIR.mkdir(exist_ok=True)
 
6
  import pandas as pd
7
  from pathlib import Path
8
 
 
9
  from data_loader import (
10
  load_hf_dataset_on_startup,
11
  get_available_leaderboards,
12
  get_eval_metadata,
13
  build_leaderboard_table,
14
  clear_cache,
15
+ search_model_across_leaderboards,
16
+ get_all_model_names,
17
  DATA_DIR
18
  )
19
+ from ui_components import (
20
+ get_theme,
21
+ get_custom_css,
22
+ format_leaderboard_header,
23
+ format_metric_details,
24
+ format_model_card,
25
+ )
26
 
27
+ PAGE_SIZE = 50
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ def update_leaderboard_table(selected_leaderboard, search_query="", current_page=1, sort_column=None, progress=gr.Progress()):
 
31
  """Loads and aggregates data for the selected leaderboard."""
32
  if not selected_leaderboard:
33
+ return (
34
+ pd.DataFrame(),
35
+ format_leaderboard_header(None, {}),
36
+ format_metric_details(None, {}),
37
+ gr.update(choices=[], value=None),
38
+ gr.update(interactive=False),
39
+ gr.update(interactive=False),
40
+ gr.update(choices=[], value=None),
41
+ "0 / 0",
42
+ )
43
 
44
  metadata = get_eval_metadata(selected_leaderboard)
45
 
 
47
  progress(value, desc=desc)
48
 
49
  df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
 
50
 
 
51
  if search_query and not df.empty:
52
  mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
53
  df = df[mask]
54
 
55
+ filtered_count = len(df)
56
+
57
+ if sort_column and sort_column in df.columns and not df.empty:
58
+ df = df.sort_values(by=sort_column, ascending=False, na_position='last')
59
+
60
+ total_pages = max(1, (filtered_count + PAGE_SIZE - 1) // PAGE_SIZE) if filtered_count > 0 else 1
61
+ current_page = max(1, min(current_page, total_pages))
62
+
63
+ start_idx = (current_page - 1) * PAGE_SIZE
64
+ end_idx = start_idx + PAGE_SIZE
65
+ df_paginated = df.iloc[start_idx:end_idx] if not df.empty else df
66
+
67
+ page_choices = [str(i) for i in range(1, total_pages + 1)]
68
+ page_dropdown = gr.update(choices=page_choices, value=str(current_page))
69
+ prev_btn = gr.update(interactive=(current_page > 1))
70
+ next_btn = gr.update(interactive=(current_page < total_pages))
71
+ page_info = f"{current_page} / {total_pages}"
72
 
73
+ sort_choices = list(df.columns) if not df.empty else []
74
+ default_sort = sort_column if sort_column and sort_column in sort_choices else ("Average" if "Average" in sort_choices else (sort_choices[0] if sort_choices else None))
75
+ sort_column_update = gr.update(choices=sort_choices, value=default_sort)
76
+
77
+ return (
78
+ df_paginated,
79
+ format_leaderboard_header(selected_leaderboard, metadata),
80
+ format_metric_details(selected_leaderboard, metadata),
81
+ page_dropdown,
82
+ prev_btn,
83
+ next_btn,
84
+ sort_column_update,
85
+ page_info,
86
+ )
87
 
88
 
89
+ def search_model(model_query):
90
+ """Search for a model and return formatted card."""
91
+ if not model_query or len(model_query) < 2:
92
+ return """
93
+ <div class="no-results">
94
+ <h3>Search for a model</h3>
95
+ <p>Enter a model name to see its benchmarks across all leaderboards</p>
96
+ </div>
97
+ """
98
+
99
+ results, _ = search_model_across_leaderboards(model_query)
100
+
101
+ if not results:
102
+ return f"""
103
+ <div class="no-results">
104
+ <h3>No results for "{model_query}"</h3>
105
+ <p>Try a different model name or check the spelling</p>
106
+ </div>
107
+ """
108
+
109
+ # Use the first matching model
110
+ model_name = list(results.keys())[0]
111
+ model_data = results[model_name]
112
+
113
+ return format_model_card(model_name, model_data)
114
+
115
+
116
+ def get_model_suggestions(query):
117
+ """Get model name suggestions for autocomplete."""
118
+ if not query or len(query) < 2:
119
+ return gr.update(choices=[])
120
+
121
+ _, matches = search_model_across_leaderboards(query)
122
+ return gr.update(choices=matches[:15])
123
+
124
+
125
+ # Load data at startup
126
  load_hf_dataset_on_startup()
127
 
128
+ # Build interface
129
+ with gr.Blocks(title="Every Eval Ever", theme=get_theme(), css=get_custom_css()) as demo:
130
 
131
+ # Header
132
+ gr.HTML("""
133
+ <div class="app-header">
134
+ <div class="logo-mark">E³</div>
135
+ <div class="brand">
136
+ <h1>Every Eval Ever</h1>
137
+ <span class="tagline">Browse and compare model benchmarks</span>
138
+ </div>
139
+ <div class="header-right">
140
+ <span class="version-badge">beta</span>
141
+ </div>
142
+ </div>
143
+ """)
144
 
145
+ with gr.Tabs():
146
+ # === TAB 1: Leaderboard View ===
147
+ with gr.TabItem("📊 Leaderboards"):
148
+ with gr.Row(elem_classes="controls-bar"):
149
+ initial_choices = get_available_leaderboards()
150
+ initial_value = initial_choices[0] if initial_choices else None
151
+
152
+ with gr.Column(scale=2, min_width=200):
153
+ leaderboard_selector = gr.Dropdown(
154
+ choices=initial_choices,
155
+ value=initial_value,
156
+ label="Leaderboard",
157
+ interactive=True
158
+ )
159
+ with gr.Column(scale=3, min_width=250):
160
+ search_box = gr.Textbox(
161
+ label="Filter",
162
+ placeholder="Filter models...",
163
+ show_label=True
164
+ )
165
+ with gr.Column(scale=1, min_width=100):
166
+ refresh_btn = gr.Button("↻ Refresh", variant="secondary", size="sm")
167
+
168
+ init_df, init_header, init_metrics, init_page_dropdown, init_prev, init_next, init_sort_cols, init_page_info = update_leaderboard_table(initial_value, "", 1, "Average")
169
+
170
+ header_view = gr.HTML(value=init_header)
171
+
172
+ # Hidden sort state (default to Average)
173
+ sort_column_dropdown = gr.Dropdown(
174
+ choices=init_sort_cols.get("choices", []) if hasattr(init_sort_cols, 'get') else [],
175
+ value=init_sort_cols.get("value") if hasattr(init_sort_cols, 'get') else None,
176
+ visible=False,
177
  )
178
+
179
+ leaderboard_table = gr.Dataframe(
180
+ value=init_df,
181
+ label=None,
182
+ interactive=False,
183
+ wrap=False,
184
+ elem_classes="dataframe",
185
+ column_widths=["28%", "12%", "7%", "7%"]
186
+ )
187
+
188
+ # Pagination below table - centered
189
+ with gr.Row(elem_classes="pagination-bar"):
190
+ prev_btn = gr.Button("←", variant="secondary", size="sm", min_width=60)
191
+ page_info = gr.Markdown(value=init_page_info, elem_classes="page-info")
192
+ next_btn = gr.Button("→", variant="secondary", size="sm", min_width=60)
193
+ page_dropdown = gr.Dropdown(
194
+ choices=[],
195
+ value="1",
196
+ visible=False,
197
+ )
198
+
199
+ metrics_view = gr.HTML(value=init_metrics)
200
+
201
+ # === TAB 2: Model View ===
202
+ with gr.TabItem("🔍 Model Lookup"):
203
+ gr.Markdown("### Find a model's benchmarks across all leaderboards")
204
+
205
+ with gr.Row(elem_classes="controls-bar"):
206
+ with gr.Column(scale=4):
207
+ model_search_dropdown = gr.Dropdown(
208
+ choices=[],
209
+ label="Model Name",
210
+ allow_custom_value=True,
211
+ interactive=True,
212
+ filterable=True,
213
+ )
214
+ with gr.Column(scale=1, min_width=100):
215
+ model_search_btn = gr.Button("Search", variant="primary", size="sm")
216
+
217
+ model_card_view = gr.HTML(value="""
218
+ <div class="no-results">
219
+ <h3>Search for a model</h3>
220
+ <p>Start typing to see suggestions, then select a model</p>
221
+ </div>
222
+ """)
223
+
224
+ # Submission guide
225
+ with gr.Accordion("📤 How to Submit Data", open=False):
226
  gr.Markdown("""
227
+ **Submit via GitHub Pull Request:**
 
 
 
 
 
 
 
 
 
228
 
229
+ 1. Fork [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
230
+ 2. Add JSON files to `data/<leaderboard>/<developer>/<model>/`
231
+ 3. Open a PR — automated validation runs on submission
232
+ 4. After merge, data syncs to HuggingFace automatically
 
 
 
 
233
 
234
+ [Submission Guide](https://github.com/evaleval/every_eval_ever#contributor-guide) · [JSON Schema](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json)
 
 
 
 
235
  """)
 
 
236
 
237
+ # === State ===
238
+ current_page_state = gr.State(value=1)
239
+ sort_column_state = gr.State(value="Average")
240
 
241
+ def go_prev(current):
242
+ return max(1, current - 1)
243
 
244
+ def go_next(current):
245
+ return current + 1
 
 
 
 
 
 
 
246
 
247
+ def reset_page():
248
+ return 1
249
 
250
+ # === Leaderboard Events ===
251
  leaderboard_selector.change(
252
+ fn=reset_page, outputs=[current_page_state]
253
+ ).then(
254
+ fn=lambda: "Average", outputs=[sort_column_state]
255
+ ).then(
256
  fn=update_leaderboard_table,
257
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
258
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
259
  )
260
 
261
  search_box.input(
262
+ fn=reset_page, outputs=[current_page_state]
263
+ ).then(
264
+ fn=update_leaderboard_table,
265
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
266
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
267
+ )
268
+
269
+ sort_column_dropdown.change(
270
+ fn=lambda col: col,
271
+ inputs=[sort_column_dropdown],
272
+ outputs=[sort_column_state]
273
+ ).then(
274
+ fn=reset_page, outputs=[current_page_state]
275
+ ).then(
276
+ fn=update_leaderboard_table,
277
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
278
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
279
+ )
280
+
281
+ page_dropdown.change(
282
+ fn=lambda p: int(p) if p else 1,
283
+ inputs=[page_dropdown],
284
+ outputs=[current_page_state]
285
+ ).then(
286
+ fn=update_leaderboard_table,
287
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
288
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
289
+ )
290
+
291
+ prev_btn.click(
292
+ fn=go_prev, inputs=[current_page_state], outputs=[current_page_state]
293
+ ).then(
294
+ fn=update_leaderboard_table,
295
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
296
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
297
+ )
298
+
299
+ next_btn.click(
300
+ fn=go_next, inputs=[current_page_state], outputs=[current_page_state]
301
+ ).then(
302
  fn=update_leaderboard_table,
303
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
304
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
305
  )
306
 
307
  refresh_btn.click(
 
309
  outputs=[leaderboard_selector]
310
  ).then(
311
  fn=lambda: clear_cache()
312
+ ).then(
313
+ fn=reset_page, outputs=[current_page_state]
314
+ ).then(
315
+ fn=lambda: "Average", outputs=[sort_column_state]
316
  ).then(
317
  fn=update_leaderboard_table,
318
+ inputs=[leaderboard_selector, search_box, current_page_state, sort_column_state],
319
+ outputs=[leaderboard_table, header_view, metrics_view, page_dropdown, prev_btn, next_btn, sort_column_dropdown, page_info]
320
+ )
321
+
322
+ # === Model Search Events ===
323
+ def update_model_suggestions(query):
324
+ """Update dropdown choices based on query."""
325
+ if not query or len(query) < 2:
326
+ return gr.update(choices=[])
327
+ _, matches = search_model_across_leaderboards(query)
328
+ return gr.update(choices=matches[:20])
329
+
330
+ model_search_dropdown.input(
331
+ fn=update_model_suggestions,
332
+ inputs=[model_search_dropdown],
333
+ outputs=[model_search_dropdown]
334
+ )
335
+
336
+ model_search_btn.click(
337
+ fn=search_model,
338
+ inputs=[model_search_dropdown],
339
+ outputs=[model_card_view]
340
+ )
341
+
342
+ model_search_dropdown.select(
343
+ fn=search_model,
344
+ inputs=[model_search_dropdown],
345
+ outputs=[model_card_view]
346
  )
347
 
348
  DATA_DIR.mkdir(exist_ok=True)
data_loader.py CHANGED
@@ -296,11 +296,16 @@ def build_leaderboard_table(selected_leaderboard, search_query="", progress_call
296
  if len(eval_only_cols) > 0:
297
  df["Average"] = df[eval_only_cols].mean(axis=1).round(3)
298
 
299
- base_cols = ["Model", "Developer", "Params (B)", "Arch", "Precision", "Average"]
300
- eval_cols = [c for c in df.columns if c not in base_cols]
 
 
 
 
301
  base_cols = [c for c in base_cols if c in df.columns]
 
302
 
303
- final_cols = base_cols + sorted(eval_cols)
304
  df = df[final_cols]
305
 
306
  if "Average" in df.columns:
@@ -315,3 +320,67 @@ def clear_cache():
315
  """Clears all caches."""
316
  LEADERBOARD_CACHE.clear()
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  if len(eval_only_cols) > 0:
297
  df["Average"] = df[eval_only_cols].mean(axis=1).round(3)
298
 
299
+ # Base columns: Model, Developer, Params, Average
300
+ # Eval columns: all evaluation scores
301
+ # Model detail columns: Arch, Precision (moved to end)
302
+ base_cols = ["Model", "Developer", "Params (B)", "Average"]
303
+ model_detail_cols = ["Arch", "Precision"]
304
+ eval_cols = [c for c in df.columns if c not in base_cols and c not in model_detail_cols]
305
  base_cols = [c for c in base_cols if c in df.columns]
306
+ model_detail_cols = [c for c in model_detail_cols if c in df.columns]
307
 
308
+ final_cols = base_cols + sorted(eval_cols) + model_detail_cols
309
  df = df[final_cols]
310
 
311
  if "Average" in df.columns:
 
320
  """Clears all caches."""
321
  LEADERBOARD_CACHE.clear()
322
 
323
+
324
+ def search_model_across_leaderboards(model_query):
325
+ """Search for a model across all leaderboards and return aggregated results."""
326
+ if not model_query or not HF_DATASET_CACHE:
327
+ return {}, []
328
+
329
+ model_query_lower = model_query.lower().strip()
330
+ results = {}
331
+ all_matches = []
332
+
333
+ for leaderboard_name, parsed_items in HF_DATASET_CACHE.items():
334
+ for item in parsed_items:
335
+ model_id = item.get("model", "")
336
+ # Check if query matches model name (case insensitive, partial match)
337
+ if model_query_lower in model_id.lower():
338
+ all_matches.append(model_id)
339
+
340
+ # Exact match gets priority
341
+ if model_id.lower() == model_query_lower or model_id == model_query:
342
+ if model_id not in results:
343
+ results[model_id] = {}
344
+ results[model_id][leaderboard_name] = {
345
+ "developer": item.get("developer"),
346
+ "params": item.get("params"),
347
+ "architecture": item.get("architecture"),
348
+ "precision": item.get("precision"),
349
+ "results": item.get("results", {})
350
+ }
351
+
352
+ # If no exact match, use partial matches
353
+ if not results and all_matches:
354
+ # Get the first partial match
355
+ for leaderboard_name, parsed_items in HF_DATASET_CACHE.items():
356
+ for item in parsed_items:
357
+ model_id = item.get("model", "")
358
+ if model_query_lower in model_id.lower():
359
+ if model_id not in results:
360
+ results[model_id] = {}
361
+ results[model_id][leaderboard_name] = {
362
+ "developer": item.get("developer"),
363
+ "params": item.get("params"),
364
+ "architecture": item.get("architecture"),
365
+ "precision": item.get("precision"),
366
+ "results": item.get("results", {})
367
+ }
368
+
369
+ # Return unique matches for autocomplete
370
+ unique_matches = sorted(set(all_matches))[:20] # Limit to 20 suggestions
371
+
372
+ return results, unique_matches
373
+
374
+
375
+ def get_all_model_names():
376
+ """Get all unique model names across all leaderboards."""
377
+ if not HF_DATASET_CACHE:
378
+ return []
379
+
380
+ models = set()
381
+ for parsed_items in HF_DATASET_CACHE.values():
382
+ for item in parsed_items:
383
+ models.add(item.get("model", ""))
384
+
385
+ return sorted(models)
386
+
json_to_parquet.py DELETED
@@ -1,228 +0,0 @@
1
-
2
-
3
- import json
4
- from pathlib import Path
5
- import pandas as pd
6
-
7
-
8
- def json_to_row(json_path: Path) -> dict:
9
- """Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns)."""
10
- with open(json_path, 'r') as f:
11
- data = json.load(f)
12
-
13
- required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp",
14
- "source_data", "source_metadata", "model_info", "evaluation_results"]
15
- for field in required_fields:
16
- if field not in data:
17
- raise ValueError(f"{json_path}: Missing required field '{field}'")
18
-
19
- if "evaluation_source_name" not in data["evaluation_source"]:
20
- raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'")
21
- if "evaluation_source_type" not in data["evaluation_source"]:
22
- raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'")
23
-
24
- if "source_organization_name" not in data["source_metadata"]:
25
- raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'")
26
- if "evaluator_relationship" not in data["source_metadata"]:
27
- raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'")
28
-
29
- if "name" not in data["model_info"]:
30
- raise ValueError(f"{json_path}: Missing required field 'model_info.name'")
31
- if "id" not in data["model_info"]:
32
- raise ValueError(f"{json_path}: Missing required field 'model_info.id'")
33
- if "developer" not in data["model_info"]:
34
- raise ValueError(f"{json_path}: Missing required field 'model_info.developer'")
35
-
36
- leaderboard = data["evaluation_source"]["evaluation_source_name"]
37
- model = data["model_info"]["id"]
38
- uuid = json_path.stem
39
- developer = data["model_info"]["developer"]
40
-
41
- # Validate evaluation results
42
- for eval_result in data["evaluation_results"]:
43
- if "evaluation_name" not in eval_result:
44
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'")
45
- if "metric_config" not in eval_result:
46
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'")
47
- if "score_details" not in eval_result:
48
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'")
49
-
50
- if "lower_is_better" not in eval_result["metric_config"]:
51
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'")
52
- if "score" not in eval_result["score_details"]:
53
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'")
54
-
55
- row = {
56
- # Folder structure (for reconstruction)
57
- "_leaderboard": leaderboard,
58
- "_developer": developer,
59
- "_model": model,
60
- "_uuid": uuid,
61
-
62
- # Required top-level fields
63
- "schema_version": data["schema_version"],
64
- "evaluation_id": data["evaluation_id"],
65
- "retrieved_timestamp": data["retrieved_timestamp"],
66
- "source_data": json.dumps(data["source_data"]),
67
-
68
- # Required nested fields
69
- "evaluation_source_name": data["evaluation_source"]["evaluation_source_name"],
70
- "evaluation_source_type": data["evaluation_source"]["evaluation_source_type"],
71
-
72
- "source_organization_name": data["source_metadata"]["source_organization_name"],
73
- "source_organization_url": data["source_metadata"].get("source_organization_url"),
74
- "source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"),
75
- "evaluator_relationship": data["source_metadata"]["evaluator_relationship"],
76
-
77
- "model_name": data["model_info"]["name"],
78
- "model_id": data["model_info"]["id"],
79
- "model_developer": data["model_info"]["developer"],
80
- "model_inference_platform": data["model_info"].get("inference_platform"),
81
-
82
- # Store full evaluation_results and additional_details as JSON
83
- "evaluation_results": json.dumps(data["evaluation_results"]),
84
- "additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None,
85
- }
86
-
87
- return row
88
-
89
-
90
- def add_to_parquet(json_or_folder: str, parquet_file: str):
91
- """
92
- Add JSON(s) to Parquet file.
93
- Creates new file if it doesn't exist, appends and deduplicates if it does.
94
-
95
- Args:
96
- json_or_folder: Path to single JSON file or folder containing JSONs
97
- parquet_file: Output Parquet file path
98
- """
99
- input_path = Path(json_or_folder)
100
-
101
- if input_path.is_file():
102
- json_files = [input_path]
103
- elif input_path.is_dir():
104
- json_files = list(input_path.rglob("*.json"))
105
- if not json_files:
106
- raise ValueError(f"No JSON files found in directory: {json_or_folder}")
107
- else:
108
- raise ValueError(f"Invalid input: {json_or_folder}")
109
-
110
- print(f"Processing {len(json_files)} JSON file(s)...")
111
-
112
- parquet_path = Path(parquet_file)
113
- if parquet_path.exists():
114
- existing_df = pd.read_parquet(parquet_file)
115
- existing_keys = set(
116
- existing_df[["_leaderboard", "_developer", "_model", "_uuid"]]
117
- .apply(tuple, axis=1)
118
- )
119
- print(f"Found {len(existing_df)} existing rows")
120
- else:
121
- existing_df = None
122
- existing_keys = set()
123
-
124
- all_rows = []
125
- skipped = 0
126
- for i, jf in enumerate(json_files, 1):
127
- if i % 100 == 0:
128
- print(f" {i}/{len(json_files)}")
129
-
130
- row = json_to_row(jf)
131
- key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"])
132
- if key not in existing_keys:
133
- all_rows.append(row)
134
- existing_keys.add(key)
135
- else:
136
- skipped += 1
137
-
138
- if skipped > 0:
139
- print(f" Skipped {skipped} duplicate file(s)")
140
-
141
- # Handle case where no new rows to add
142
- if not all_rows:
143
- if existing_df is not None:
144
- print(f"No new files to add, keeping existing {len(existing_df)} file(s)")
145
- return
146
- else:
147
- raise ValueError("No valid JSON files to process and no existing parquet file")
148
-
149
- new_df = pd.DataFrame(all_rows)
150
-
151
- if existing_df is not None:
152
- df = pd.concat([existing_df, new_df], ignore_index=True)
153
- print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)")
154
- else:
155
- df = new_df
156
-
157
- df.to_parquet(parquet_file, index=False)
158
- print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)")
159
-
160
-
161
- def parquet_to_folder(parquet_file: str, output_dir: str):
162
- """Reconstruct folder structure from Parquet."""
163
- df = pd.read_parquet(parquet_file)
164
- out = Path(output_dir)
165
-
166
- for _, row in df.iterrows():
167
- lb = row["_leaderboard"]
168
- dev = row["_developer"]
169
- model = row["_model"]
170
- uuid = row["_uuid"]
171
-
172
- json_data = {
173
- "schema_version": row["schema_version"],
174
- "evaluation_id": row["evaluation_id"],
175
- "retrieved_timestamp": row["retrieved_timestamp"],
176
- "source_data": json.loads(row["source_data"]),
177
- "evaluation_source": {
178
- "evaluation_source_name": row["evaluation_source_name"],
179
- "evaluation_source_type": row["evaluation_source_type"]
180
- },
181
- "source_metadata": {
182
- "source_organization_name": row["source_organization_name"],
183
- "evaluator_relationship": row["evaluator_relationship"]
184
- },
185
- "model_info": {
186
- "name": row["model_name"],
187
- "id": row["model_id"],
188
- "developer": row["model_developer"]
189
- },
190
- "evaluation_results": json.loads(row["evaluation_results"])
191
- }
192
-
193
- if pd.notna(row["source_organization_url"]):
194
- json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"]
195
- if pd.notna(row["source_organization_logo_url"]):
196
- json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"]
197
-
198
- if pd.notna(row["model_inference_platform"]):
199
- json_data["model_info"]["inference_platform"] = row["model_inference_platform"]
200
-
201
- if pd.notna(row["additional_details"]):
202
- json_data["additional_details"] = json.loads(row["additional_details"])
203
-
204
- file_path = out / lb / dev / model / f"{uuid}.json"
205
- file_path.parent.mkdir(parents=True, exist_ok=True)
206
- with open(file_path, 'w') as f:
207
- json.dump(json_data, f, indent=2)
208
-
209
- print(f"Reconstructed {len(df)} files to {output_dir}")
210
-
211
-
212
- if __name__ == "__main__":
213
- import sys
214
-
215
- if len(sys.argv) < 2:
216
- print("Usage:")
217
- print(" python json_to_parquet.py add <json_or_folder> <output.parquet>")
218
- print(" python json_to_parquet.py export <input.parquet> <output_dir>")
219
- sys.exit(1)
220
-
221
- cmd = sys.argv[1]
222
-
223
- if cmd == "add":
224
- add_to_parquet(sys.argv[2], sys.argv[3])
225
- elif cmd == "export":
226
- parquet_to_folder(sys.argv[2], sys.argv[3])
227
- else:
228
- print(f"Unknown command: {cmd}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/convert_to_parquet.py DELETED
@@ -1,142 +0,0 @@
1
- """
2
- Incremental parquet conversion with HuggingFace sync.
3
-
4
- Optimized workflow:
5
- 1. Detect changed leaderboards via git diff (instant!)
6
- 2. Download ONLY changed parquets from HF (fast!)
7
- 3. Re-convert ONLY changed leaderboards
8
- 4. Ready for upload (handled by upload_to_hf.py)
9
-
10
- This avoids downloading and processing unchanged leaderboards.
11
- """
12
-
13
- from pathlib import Path
14
- import sys
15
- import subprocess
16
- import os
17
- import json
18
- from datasets import load_dataset
19
-
20
- sys.path.insert(0, str(Path(__file__).parent.resolve().parent))
21
-
22
- from json_to_parquet import add_to_parquet
23
-
24
- HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "deepmage121/eee_test")
25
-
26
- def download_leaderboards(output_dir: Path, leaderboard_names: set[str]) -> set[str]:
27
- """Download existing leaderboard parquets from HuggingFace."""
28
- try:
29
- dataset_dict = load_dataset(HF_DATASET_REPO)
30
- downloaded: set[str] = set()
31
-
32
- for lb in leaderboard_names:
33
- if lb in dataset_dict:
34
- print(f" Downloading {lb}")
35
- dataset_dict[lb].to_pandas().to_parquet(output_dir / f"{lb}.parquet", index=False)
36
- downloaded.add(lb)
37
- else:
38
- print(f" {lb} (new)")
39
-
40
- print(f"Downloaded {len(downloaded)}/{len(leaderboard_names)} parquet(s)")
41
- return downloaded
42
-
43
- except Exception as e:
44
- print(f"HF download failed: {e}")
45
- sys.exit(1)
46
-
47
-
48
- def detect_modified_leaderboards() -> set[str]:
49
- """Get leaderboards with changed JSONs via git diff (HEAD~1)."""
50
- try:
51
- result = subprocess.run(
52
- ["git", "diff", "--name-only", "HEAD~1", "HEAD", "data/"],
53
- capture_output=True, text=True, check=True
54
- )
55
-
56
- changed_files = result.stdout.strip().split('\n')
57
- if not changed_files or changed_files == ['']:
58
- print("No changes detected in data/")
59
- return set()
60
-
61
- leaderboards = {
62
- Path(f).parts[1]
63
- for f in changed_files
64
- if f.startswith('data/') and f.endswith('.json') and len(Path(f).parts) >= 2
65
- }
66
- return leaderboards
67
-
68
- except subprocess.CalledProcessError as e:
69
- print(f"ERROR: Git command failed: {e}")
70
- sys.exit(1)
71
-
72
-
73
- def convert_changed_leaderboards():
74
- """
75
- Optimized conversion: detect changes, download only changed, re-convert only changed.
76
- """
77
-
78
- data_dir = Path("data")
79
- output_dir = Path("parquet_output")
80
- output_dir.mkdir(exist_ok=True)
81
-
82
- if not data_dir.exists():
83
- print(f"Data directory not found: {data_dir}")
84
- sys.exit(1)
85
-
86
- changed_leaderboards: set[str] = detect_modified_leaderboards()
87
-
88
- if len(changed_leaderboards) == 0:
89
- print("No changes detected, nothing to upload")
90
- manifest = {"changed": [], "converted": []}
91
- (output_dir / "changed_leaderboards.json").write_text(json.dumps(manifest, indent=2))
92
- sys.exit(0)
93
-
94
- print(f"Detected {len(changed_leaderboards)} changed leaderboard(s):")
95
- for lb in changed_leaderboards:
96
- print(f" {lb}")
97
-
98
- downloaded = download_leaderboards(output_dir, changed_leaderboards)
99
-
100
- converted_count = 0
101
- error_count = 0
102
- converted_leaderboards = []
103
-
104
- for leaderboard_name in changed_leaderboards:
105
- leaderboard_dir = os.path.join(data_dir, leaderboard_name)
106
-
107
- parquet_path = os.path.join(output_dir, f"{leaderboard_name}.parquet")
108
-
109
- print(f"\nConverting: {leaderboard_name}")
110
-
111
- try:
112
- add_to_parquet(json_or_folder=str(leaderboard_dir), parquet_file=str(parquet_path))
113
-
114
- print(f" Converted to {parquet_path}")
115
- converted_count += 1
116
- converted_leaderboards.append(leaderboard_name)
117
-
118
- except Exception as e:
119
- print(f" Error: {e}")
120
- error_count += 1
121
-
122
- manifest = {
123
- "changed": list(changed_leaderboards),
124
- "converted": converted_leaderboards,
125
- "downloaded": list(downloaded),
126
- "errors": error_count
127
- }
128
- manifest_path = os.path.join(output_dir, "changed_leaderboards.json")
129
- with open(manifest_path, 'w') as f:
130
- json.dump(manifest, f, indent=2)
131
-
132
- if error_count > 0:
133
- sys.exit(1)
134
-
135
- if converted_count == 0:
136
- print("Warning: No parquet files successfully converted!")
137
- sys.exit(1)
138
-
139
-
140
- if __name__ == "__main__":
141
- convert_changed_leaderboards()
142
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/json_to_parquet.py DELETED
@@ -1,222 +0,0 @@
1
- """
2
- Convert evaluation JSONs to Parquet for HF Datasets.
3
- Input: single JSON or folder of JSONs (any structure)
4
- Output: Parquet with all data + reconstructable folder structure
5
- """
6
-
7
- import json
8
- from pathlib import Path
9
- import pandas as pd
10
-
11
-
12
- def json_to_row(json_path: Path) -> dict:
13
- """Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns)."""
14
- with open(json_path, 'r') as f:
15
- data = json.load(f)
16
-
17
- required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp",
18
- "source_data", "source_metadata", "model_info", "evaluation_results"]
19
- for field in required_fields:
20
- if field not in data:
21
- raise ValueError(f"{json_path}: Missing required field '{field}'")
22
-
23
- if "evaluation_source_name" not in data["evaluation_source"]:
24
- raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'")
25
- if "evaluation_source_type" not in data["evaluation_source"]:
26
- raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'")
27
-
28
- if "source_organization_name" not in data["source_metadata"]:
29
- raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'")
30
- if "evaluator_relationship" not in data["source_metadata"]:
31
- raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'")
32
-
33
- if "name" not in data["model_info"]:
34
- raise ValueError(f"{json_path}: Missing required field 'model_info.name'")
35
- if "id" not in data["model_info"]:
36
- raise ValueError(f"{json_path}: Missing required field 'model_info.id'")
37
- if "developer" not in data["model_info"]:
38
- raise ValueError(f"{json_path}: Missing required field 'model_info.developer'")
39
-
40
- leaderboard = data["evaluation_source"]["evaluation_source_name"]
41
- model = data["model_info"]["id"]
42
- uuid = json_path.stem
43
- developer = data["model_info"]["developer"]
44
-
45
- # Validate evaluation results
46
- for eval_result in data["evaluation_results"]:
47
- if "evaluation_name" not in eval_result:
48
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'")
49
- if "metric_config" not in eval_result:
50
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'")
51
- if "score_details" not in eval_result:
52
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'")
53
-
54
- if "lower_is_better" not in eval_result["metric_config"]:
55
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'")
56
- if "score" not in eval_result["score_details"]:
57
- raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'")
58
-
59
- row = {
60
- # Folder structure (for reconstruction)
61
- "_leaderboard": leaderboard,
62
- "_developer": developer,
63
- "_model": model,
64
- "_uuid": uuid,
65
-
66
- # Required top-level fields
67
- "schema_version": data["schema_version"],
68
- "evaluation_id": data["evaluation_id"],
69
- "retrieved_timestamp": data["retrieved_timestamp"],
70
- "source_data": json.dumps(data["source_data"]),
71
-
72
- # Required nested fields
73
- "evaluation_source_name": data["evaluation_source"]["evaluation_source_name"],
74
- "evaluation_source_type": data["evaluation_source"]["evaluation_source_type"],
75
-
76
- "source_organization_name": data["source_metadata"]["source_organization_name"],
77
- "source_organization_url": data["source_metadata"].get("source_organization_url"),
78
- "source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"),
79
- "evaluator_relationship": data["source_metadata"]["evaluator_relationship"],
80
-
81
- "model_name": data["model_info"]["name"],
82
- "model_id": data["model_info"]["id"],
83
- "model_developer": data["model_info"]["developer"],
84
- "model_inference_platform": data["model_info"].get("inference_platform"),
85
-
86
- # Store full evaluation_results and additional_details as JSON
87
- "evaluation_results": json.dumps(data["evaluation_results"]),
88
- "additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None,
89
- }
90
-
91
- return row
92
-
93
-
94
- def add_to_parquet(json_input: str, parquet_file: str):
95
- """
96
- Add JSON(s) to Parquet file.
97
- Creates new file if it doesn't exist, appends and deduplicates if it does.
98
-
99
- Args:
100
- json_input: Path to single JSON file or folder containing JSONs
101
- parquet_file: Output Parquet file path
102
- """
103
- input_path = Path(json_input)
104
-
105
- if input_path.is_file():
106
- json_files = [input_path]
107
- elif input_path.is_dir():
108
- json_files = list(input_path.rglob("*.json"))
109
- else:
110
- raise ValueError(f"Invalid input: {json_input}")
111
-
112
- print(f"Processing {len(json_files)} JSON file(s)...")
113
-
114
- parquet_path = Path(parquet_file)
115
- if parquet_path.exists():
116
- existing_df = pd.read_parquet(parquet_file)
117
- existing_keys = set(
118
- existing_df[["_leaderboard", "_developer", "_model", "_uuid"]]
119
- .apply(tuple, axis=1)
120
- )
121
- print(f"Found {len(existing_df)} existing rows")
122
- else:
123
- existing_df = None
124
- existing_keys = set()
125
-
126
- all_rows = []
127
- skipped = 0
128
- for i, jf in enumerate(json_files, 1):
129
- if i % 100 == 0:
130
- print(f" {i}/{len(json_files)}")
131
-
132
- row = json_to_row(jf)
133
- key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"])
134
- if key not in existing_keys:
135
- all_rows.append(row)
136
- existing_keys.add(key)
137
- else:
138
- skipped += 1
139
-
140
- if skipped > 0:
141
- print(f" Skipped {skipped} duplicate file(s)")
142
-
143
- new_df = pd.DataFrame(all_rows)
144
-
145
- if existing_df is not None:
146
- df = pd.concat([existing_df, new_df], ignore_index=True)
147
- print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)")
148
- else:
149
- df = new_df
150
-
151
- df.to_parquet(parquet_file, index=False)
152
- print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)")
153
-
154
-
155
- def parquet_to_folder(parquet_file: str, output_dir: str):
156
- """Reconstruct folder structure from Parquet."""
157
- df = pd.read_parquet(parquet_file)
158
- out = Path(output_dir)
159
-
160
- for _, row in df.iterrows():
161
- lb = row["_leaderboard"]
162
- dev = row["_developer"]
163
- model = row["_model"]
164
- uuid = row["_uuid"]
165
-
166
- json_data = {
167
- "schema_version": row["schema_version"],
168
- "evaluation_id": row["evaluation_id"],
169
- "retrieved_timestamp": row["retrieved_timestamp"],
170
- "source_data": json.loads(row["source_data"]),
171
- "evaluation_source": {
172
- "evaluation_source_name": row["evaluation_source_name"],
173
- "evaluation_source_type": row["evaluation_source_type"]
174
- },
175
- "source_metadata": {
176
- "source_organization_name": row["source_organization_name"],
177
- "evaluator_relationship": row["evaluator_relationship"]
178
- },
179
- "model_info": {
180
- "name": row["model_name"],
181
- "id": row["model_id"],
182
- "developer": row["model_developer"]
183
- },
184
- "evaluation_results": json.loads(row["evaluation_results"])
185
- }
186
-
187
- if pd.notna(row["source_organization_url"]):
188
- json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"]
189
- if pd.notna(row["source_organization_logo_url"]):
190
- json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"]
191
-
192
- if pd.notna(row["model_inference_platform"]):
193
- json_data["model_info"]["inference_platform"] = row["model_inference_platform"]
194
-
195
- if pd.notna(row["additional_details"]):
196
- json_data["additional_details"] = json.loads(row["additional_details"])
197
-
198
- file_path = out / lb / dev / model / f"{uuid}.json"
199
- file_path.parent.mkdir(parents=True, exist_ok=True)
200
- with open(file_path, 'w') as f:
201
- json.dump(json_data, f, indent=2)
202
-
203
- print(f"Reconstructed {len(df)} files to {output_dir}")
204
-
205
-
206
- if __name__ == "__main__":
207
- import sys
208
-
209
- if len(sys.argv) < 2:
210
- print("Usage:")
211
- print(" python json_to_parquet.py add <json_or_folder> <output.parquet>")
212
- print(" python json_to_parquet.py export <input.parquet> <output_dir>")
213
- sys.exit(1)
214
-
215
- cmd = sys.argv[1]
216
-
217
- if cmd == "add":
218
- add_to_parquet(sys.argv[2], sys.argv[3])
219
- elif cmd == "export":
220
- parquet_to_folder(sys.argv[2], sys.argv[3])
221
- else:
222
- print(f"Unknown command: {cmd}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ui_components.py CHANGED
@@ -1,211 +1,838 @@
1
  """
2
  UI Components: Themes, CSS, and HTML formatters for the Gradio interface.
 
3
  """
4
  import gradio as gr
5
 
6
 
7
  def get_theme():
8
- """Returns the custom Gradio theme."""
9
- return gr.themes.Soft(
10
- primary_hue="slate",
11
  neutral_hue="slate",
12
- font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
 
13
  ).set(
14
- body_background_fill="var(--neutral-50)",
15
- block_background_fill="white",
 
 
 
 
 
 
16
  block_border_width="1px",
17
- block_title_text_weight="600"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  )
19
 
20
 
21
  def get_custom_css():
22
- """Returns custom CSS for the interface."""
23
  return """
24
- /* Clean up the global container */
 
 
 
 
 
 
 
25
  .gradio-container {
26
  max-width: 100% !important;
27
- padding: 0 2rem !important;
 
 
 
 
 
28
  }
29
 
30
- /* Hide file list in uploaders */
31
- .file-preview {
32
- display: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  }
34
 
35
- /* Ensure details elements work independently */
36
- details {
37
- position: relative;
38
- isolation: isolate;
39
  }
40
 
41
- details summary {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  cursor: pointer;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
  """
45
 
46
 
47
  def format_leaderboard_header(selected_leaderboard, metadata):
48
- """Formats the leaderboard header info (goes at top)."""
49
  if not selected_leaderboard:
50
  return """
51
- <div style="text-align: center; padding: 3rem; color: var(--body-text-color-subdued);">
52
- <h3>👋 Welcome to Eval Leaderboard</h3>
53
- <p>Select a leaderboard above to visualize results and metadata.</p>
54
  </div>
55
  """
56
 
57
  if not metadata or not metadata.get("evals"):
58
- return f"""<div style="padding: 1rem;">No metadata found for {selected_leaderboard}</div>"""
 
 
 
 
59
 
60
  source_info = metadata.get("source_info", {})
61
  org = source_info.get("organization", "Unknown")
62
- relationship = source_info.get("relationship", "Unknown").replace("_", " ").title()
63
  url = source_info.get("url", "#")
64
  eval_names = list(metadata["evals"].keys())
65
 
66
- # Create badges for evaluations
67
- eval_badges = "".join([f"""
68
- <span style="
69
- display: inline-block;
70
- padding: 0.25rem 0.75rem;
71
- margin: 0.25rem 0.25rem 0.25rem 0;
72
- background: var(--background-fill-primary);
73
- border: 1px solid var(--border-color-primary);
74
- border-radius: 16px;
75
- font-size: 0.8rem;
76
- color: var(--body-text-color);
77
- font-weight: 500;
78
- ">{name}</span>
79
- """ for name in eval_names])
80
 
81
  return f"""
82
- <div style="
83
- padding: 1.25rem;
84
- background: var(--background-fill-secondary);
85
- border-radius: 8px;
86
- border-left: 4px solid #667eea;
87
- ">
88
- <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 1rem;">
89
- <div style="flex: 1;">
90
- <h3 style="margin: 0 0 0.5rem 0; font-size: 1.2rem; font-weight: 600; color: var(--body-text-color);">
91
- {selected_leaderboard}
92
- </h3>
93
- <div style="font-size: 0.9rem; color: var(--body-text-color-subdued); margin-bottom: 0.75rem;">
94
- <span><strong>Source Organization:</strong> {org}</span> •
95
- <span><strong>Evaluator Relationship:</strong> {relationship}</span>
96
- </div>
97
- <div style="margin-top: 0.75rem;">
98
- <div style="font-size: 0.85rem; font-weight: 600; color: var(--body-text-color); margin-bottom: 0.5rem;">
99
- Included Evaluations:
100
- </div>
101
- <div>{eval_badges}</div>
102
- </div>
103
  </div>
104
  <a href="{url}" target="_blank" style="
105
- font-size: 0.85rem;
106
- color: var(--link-text-color);
107
  text-decoration: none;
108
- padding: 0.5rem 1rem;
 
109
  border-radius: 6px;
110
- background: var(--background-fill-primary);
111
- border: 1px solid var(--border-color-primary);
112
- transition: all 0.2s;
113
  white-space: nowrap;
114
- ">
115
- Source
116
- </a>
117
  </div>
118
  </div>
119
  """
120
 
121
 
122
  def format_metric_details(selected_leaderboard, metadata):
123
- """Formats metric detail cards (goes below table)."""
124
  if not selected_leaderboard or not metadata or not metadata.get("evals"):
125
  return ""
126
 
127
  evals = metadata.get("evals", {})
128
 
129
  html = """
130
- <h3 style="margin: 2rem 0 1rem 0; font-size: 1.1rem; font-weight: 600; color: var(--body-text-color);">
131
- 📏 Metric Details
132
- </h3>
133
-
134
- <div style="
135
- display: grid;
136
- grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
137
- gap: 1rem;
138
- ">
139
  """
140
 
141
  for eval_name, info in evals.items():
142
- score_type = info['score_type'].upper() if info.get('score_type') else "UNKNOWN"
143
  direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
144
- direction_icon = "↓" if info.get('lower_is_better') else "↑"
145
 
146
- details_content = ""
147
  if info.get('score_type') == "continuous" and info.get('min_score') is not None:
148
- details_content += f"<div><span style='opacity: 0.7;'>Range:</span> <strong>[{info['min_score']} - {info['max_score']}]</strong></div>"
149
  elif info.get('score_type') == "levels" and info.get('level_names'):
150
- levels = ", ".join(str(l) for l in info['level_names'])
151
- details_content += f"<div><span style='opacity: 0.7;'>Levels:</span> <strong>{levels}</strong></div>"
152
 
153
- if info.get('has_unknown_level'):
154
- details_content += "<div style='margin-top: 0.25rem; font-size: 0.8rem; opacity: 0.7;'>* -1 indicates Unknown</div>"
155
-
156
  html += f"""
157
- <details style="
158
- background: var(--background-fill-secondary);
159
- border: 1px solid var(--border-color-primary);
160
- border-radius: 6px;
161
- overflow: hidden;
162
- height: fit-content;
163
- ">
164
- <summary style="
165
- padding: 0.75rem 1rem;
166
- cursor: pointer;
167
- font-weight: 600;
168
- display: flex;
169
- align-items: center;
170
- justify-content: space-between;
171
- list-style: none;
172
- font-size: 0.95rem;
173
- ">
174
- <div style="display: flex; align-items: center; gap: 0.5rem;">
175
- <span style="font-size: 1.1rem; opacity: 0.8;">🏷️</span>
176
- <span style="white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{eval_name}</span>
177
- </div>
178
- <div style="display: flex; align-items: center; gap: 0.5rem;">
179
- <span style="font-size: 0.8rem; font-weight: 400; color: var(--body-text-color-subdued); white-space: nowrap;">{direction_icon} {direction}</span>
180
- </div>
181
  </summary>
182
-
183
- <div style="
184
- padding: 0.75rem 1rem;
185
- border-top: 1px solid var(--border-color-primary);
186
- background: var(--block-background-fill);
187
- font-size: 0.9rem;
188
- ">
189
- <p style="margin: 0 0 0.5rem 0; color: var(--body-text-color-subdued); line-height: 1.4;">
190
- {info['description']}
191
- </p>
192
- <div style="display: flex; justify-content: space-between; align-items: flex-end; margin-top: 0.5rem;">
193
- <div style="font-size: 0.85rem;">
194
- {details_content}
195
- </div>
196
- <span style="
197
- font-size: 0.7rem;
198
- padding: 1px 6px;
199
- border-radius: 4px;
200
- background: var(--background-fill-primary);
201
- border: 1px solid var(--border-color-primary);
202
- color: var(--body-text-color-subdued);
203
- ">{score_type}</span>
204
  </div>
205
  </div>
206
  </details>
207
  """
208
 
209
- html += "</div>"
210
  return html
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  UI Components: Themes, CSS, and HTML formatters for the Gradio interface.
3
+ Nord color theme with balanced contrast.
4
  """
5
  import gradio as gr
6
 
7
 
8
  def get_theme():
9
+ """Returns the Nord-themed Gradio theme."""
10
+ return gr.themes.Base(
11
+ primary_hue="blue",
12
  neutral_hue="slate",
13
+ font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"],
14
+ font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
15
  ).set(
16
+ body_background_fill="#2E3440",
17
+ body_background_fill_dark="#2E3440",
18
+ body_text_color="#ECEFF4",
19
+ body_text_color_dark="#ECEFF4",
20
+ body_text_color_subdued="#4C566A",
21
+ body_text_color_subdued_dark="#4C566A",
22
+ block_background_fill="#3B4252",
23
+ block_background_fill_dark="#3B4252",
24
  block_border_width="1px",
25
+ block_border_color="#434C5E",
26
+ block_border_color_dark="#434C5E",
27
+ block_label_text_color="#D8DEE9",
28
+ block_label_text_color_dark="#D8DEE9",
29
+ block_title_text_color="#ECEFF4",
30
+ block_title_text_color_dark="#ECEFF4",
31
+ input_background_fill="#2E3440",
32
+ input_background_fill_dark="#2E3440",
33
+ input_border_color="#4C566A",
34
+ input_border_color_dark="#4C566A",
35
+ button_primary_background_fill="#88C0D0",
36
+ button_primary_background_fill_dark="#88C0D0",
37
+ button_primary_text_color="#2E3440",
38
+ button_primary_text_color_dark="#2E3440",
39
+ button_secondary_background_fill="#434C5E",
40
+ button_secondary_background_fill_dark="#434C5E",
41
+ button_secondary_text_color="#ECEFF4",
42
+ button_secondary_text_color_dark="#ECEFF4",
43
  )
44
 
45
 
46
  def get_custom_css():
47
+ """Returns custom CSS with Nord colors."""
48
  return """
49
+ /* === Nord Theme ===
50
+ Polar Night: #2E3440 (bg), #3B4252 (surface), #434C5E, #4C566A
51
+ Snow Storm: #D8DEE9, #E5E9F0, #ECEFF4
52
+ Frost: #8FBCBB, #88C0D0, #81A1C1, #5E81AC
53
+ Aurora: #BF616A, #D08770, #EBCB8B, #A3BE8C, #B48EAD
54
+ */
55
+
56
+ /* === Base === */
57
  .gradio-container {
58
  max-width: 100% !important;
59
+ margin: 0 !important;
60
+ padding: 1.25rem 2.5rem 2rem !important;
61
+ background: #2E3440 !important;
62
+ color: #ECEFF4 !important;
63
+ font-family: 'DM Sans', system-ui, sans-serif !important;
64
+ font-size: 16px !important;
65
  }
66
 
67
+ /* === Header === */
68
+ .app-header {
69
+ display: flex;
70
+ align-items: center;
71
+ gap: 1rem;
72
+ margin-bottom: 1.5rem;
73
+ padding: 1.25rem 1.5rem;
74
+ background: #3B4252;
75
+ border: 1px solid #434C5E;
76
+ border-radius: 12px;
77
+ }
78
+
79
+ .app-header .logo-mark {
80
+ width: 48px;
81
+ height: 48px;
82
+ background: linear-gradient(135deg, #88C0D0 0%, #81A1C1 100%);
83
+ border-radius: 12px;
84
+ display: flex;
85
+ align-items: center;
86
+ justify-content: center;
87
+ font-weight: 800;
88
+ font-size: 1.1rem;
89
+ color: #2E3440;
90
+ }
91
+
92
+ .app-header .brand {
93
+ display: flex;
94
+ flex-direction: column;
95
+ gap: 0.125rem;
96
+ }
97
+
98
+ .app-header h1 {
99
+ margin: 0;
100
+ font-size: 1.5rem;
101
+ font-weight: 700;
102
+ color: #ECEFF4;
103
+ letter-spacing: -0.02em;
104
+ }
105
+
106
+ .app-header .tagline {
107
+ color: #D8DEE9;
108
+ font-size: 0.85rem;
109
+ }
110
+
111
+ .app-header .header-right {
112
+ margin-left: auto;
113
+ display: flex;
114
+ align-items: center;
115
+ gap: 0.75rem;
116
+ }
117
+
118
+ .app-header .version-badge {
119
+ background: rgba(136, 192, 208, 0.2);
120
+ border: 1px solid rgba(136, 192, 208, 0.4);
121
+ border-radius: 6px;
122
+ padding: 0.25rem 0.625rem;
123
+ font-size: 0.7rem;
124
+ font-family: 'JetBrains Mono', monospace;
125
+ color: #88C0D0;
126
+ }
127
+
128
+ /* === Tabs === */
129
+ .tabs {
130
+ border: none !important;
131
+ background: transparent !important;
132
+ }
133
+
134
+ .tab-nav {
135
+ background: #3B4252 !important;
136
+ border: 1px solid #434C5E !important;
137
+ border-radius: 10px !important;
138
+ padding: 0.25rem !important;
139
+ gap: 0.25rem !important;
140
+ margin-bottom: 1.25rem !important;
141
+ display: inline-flex !important;
142
+ }
143
+
144
+ .tab-nav button {
145
+ background: transparent !important;
146
+ border: none !important;
147
+ color: #D8DEE9 !important;
148
+ padding: 0.75rem 1.5rem !important;
149
+ font-size: 0.95rem !important;
150
+ font-weight: 500 !important;
151
+ border-radius: 8px !important;
152
+ transition: all 0.15s ease !important;
153
+ }
154
+
155
+ .tab-nav button.selected {
156
+ color: #2E3440 !important;
157
+ background: #88C0D0 !important;
158
+ }
159
+
160
+ .tab-nav button:hover:not(.selected) {
161
+ background: #434C5E !important;
162
+ color: #ECEFF4 !important;
163
+ }
164
+
165
+ .tabitem {
166
+ background: transparent !important;
167
+ border: none !important;
168
+ padding: 0 !important;
169
+ }
170
+
171
+ /* === Controls bar === */
172
+ .controls-bar {
173
+ background: #3B4252 !important;
174
+ border: 1px solid #434C5E !important;
175
+ border-radius: 10px !important;
176
+ padding: 0.75rem 1.25rem !important;
177
+ margin-bottom: 1rem !important;
178
+ gap: 0.75rem !important;
179
+ }
180
+
181
+ .controls-bar label {
182
+ font-size: 0.75rem !important;
183
+ text-transform: uppercase !important;
184
+ letter-spacing: 0.04em !important;
185
+ color: #D8DEE9 !important;
186
+ font-weight: 500 !important;
187
+ }
188
+
189
+ /* === Info banner === */
190
+ .info-banner {
191
+ background: #3B4252 !important;
192
+ border: 1px solid #434C5E !important;
193
+ border-left: 3px solid #88C0D0 !important;
194
+ border-radius: 0 10px 10px 0 !important;
195
+ padding: 0.75rem 1rem !important;
196
+ margin-bottom: 1rem !important;
197
+ }
198
+
199
+ .info-banner h3 {
200
+ margin: 0;
201
+ font-size: 1.1rem;
202
+ font-weight: 600;
203
+ color: #ECEFF4;
204
+ }
205
+
206
+ .info-banner .eval-tags {
207
+ display: flex;
208
+ flex-wrap: wrap;
209
+ gap: 0.375rem;
210
+ }
211
+
212
+ .info-banner .eval-tag {
213
+ background: rgba(143, 188, 187, 0.15);
214
+ border: 1px solid rgba(143, 188, 187, 0.3);
215
+ border-radius: 4px;
216
+ padding: 0.3rem 0.6rem;
217
+ font-size: 0.8rem;
218
+ font-family: 'JetBrains Mono', monospace;
219
+ color: #8FBCBB;
220
+ }
221
+
222
+ /* === Dataframe - seamless styling === */
223
+ .dataframe,
224
+ .dataframe > div,
225
+ .dataframe > div > div,
226
+ .dataframe .table-wrap,
227
+ .dataframe .svelte-1gfkn6j {
228
+ background: #2E3440 !important;
229
+ border: none !important;
230
+ box-shadow: none !important;
231
+ border-radius: 0 !important;
232
+ }
233
+
234
+ .dataframe table {
235
+ width: 100% !important;
236
+ border-collapse: collapse !important;
237
+ font-size: 0.95rem !important;
238
+ table-layout: fixed !important;
239
+ background: #2E3440 !important;
240
+ }
241
+
242
+ .dataframe thead th:nth-child(1) { width: 28%; }
243
+ .dataframe thead th:nth-child(2) { width: 12%; }
244
+ .dataframe thead th:nth-child(3) { width: 7%; }
245
+ .dataframe thead th:nth-child(4) { width: 7%; }
246
+ .dataframe thead th:nth-child(n+5) { width: auto; }
247
+
248
+ .dataframe thead,
249
+ .dataframe thead tr {
250
+ background: #2E3440 !important;
251
+ position: sticky;
252
+ top: 0;
253
+ z-index: 10;
254
+ }
255
+
256
+ .dataframe thead th {
257
+ padding: 0.875rem 1rem !important;
258
+ font-weight: 600 !important;
259
+ font-size: 0.75rem !important;
260
+ text-transform: uppercase !important;
261
+ letter-spacing: 0.05em !important;
262
+ color: #81A1C1 !important;
263
+ border-bottom: 1px solid #434C5E !important;
264
+ border-top: none !important;
265
+ text-align: left !important;
266
+ background: #2E3440 !important;
267
+ }
268
+
269
+ .dataframe tbody,
270
+ .dataframe tbody tr {
271
+ background: #2E3440 !important;
272
+ }
273
+
274
+ .dataframe tbody tr {
275
+ border-bottom: 1px solid #3B4252 !important;
276
+ }
277
+
278
+ .dataframe tbody tr:hover {
279
+ background: rgba(136, 192, 208, 0.04) !important;
280
+ }
281
+
282
+ .dataframe tbody td {
283
+ padding: 0.75rem 1rem !important;
284
+ color: #E5E9F0 !important;
285
+ background: #2E3440 !important;
286
+ overflow: hidden !important;
287
+ text-overflow: ellipsis !important;
288
+ border: none !important;
289
+ }
290
+
291
+ /* === Pagination bar === */
292
+ .pagination-bar {
293
+ margin-top: 1rem !important;
294
+ padding: 1rem 0 !important;
295
+ border-top: 1px solid #3B4252 !important;
296
+ display: flex !important;
297
+ justify-content: center !important;
298
+ align-items: center !important;
299
+ gap: 1rem !important;
300
+ }
301
+
302
+ .page-info {
303
+ font-family: 'JetBrains Mono', monospace !important;
304
+ font-size: 1rem !important;
305
+ color: #D8DEE9 !important;
306
+ min-width: 80px !important;
307
+ text-align: center !important;
308
+ }
309
+
310
+ /* Model name - white, readable */
311
+ .dataframe tbody td:first-child {
312
+ font-weight: 500 !important;
313
+ color: #ECEFF4 !important;
314
+ white-space: nowrap !important;
315
+ }
316
+
317
+ /* Developer - frost blue */
318
+ .dataframe tbody td:nth-child(2) {
319
+ color: #88C0D0 !important;
320
+ white-space: nowrap !important;
321
+ }
322
+
323
+ /* Params - aurora orange */
324
+ .dataframe tbody td:nth-child(3) {
325
+ font-family: 'JetBrains Mono', monospace !important;
326
+ color: #D08770 !important;
327
+ text-align: right !important;
328
+ }
329
+
330
+ /* Average - aurora green */
331
+ .dataframe tbody td:nth-child(4) {
332
+ font-family: 'JetBrains Mono', monospace !important;
333
+ font-weight: 600 !important;
334
+ color: #A3BE8C !important;
335
+ text-align: right !important;
336
+ }
337
+
338
+ /* Metrics - frost teal */
339
+ .dataframe tbody td:nth-child(n+5) {
340
+ font-family: 'JetBrains Mono', monospace !important;
341
+ text-align: right !important;
342
+ color: #8FBCBB !important;
343
+ white-space: nowrap !important;
344
+ }
345
+
346
+ /* === Status text === */
347
+ .status-text {
348
+ font-size: 0.9rem !important;
349
+ color: #D8DEE9 !important;
350
+ padding: 0.5rem 0 !important;
351
+ font-family: 'JetBrains Mono', monospace !important;
352
+ }
353
+
354
+ /* === Model Card === */
355
+ .model-card-container {
356
+ display: flex;
357
+ flex-direction: column;
358
+ gap: 1.25rem;
359
+ }
360
+
361
+ .model-card-header {
362
+ background: #3B4252;
363
+ border: 1px solid #434C5E;
364
+ border-radius: 12px;
365
+ padding: 1.5rem 2rem;
366
+ }
367
+
368
+ .model-card-header h2 {
369
+ margin: 0 0 0.5rem 0;
370
+ font-size: 1.5rem;
371
+ font-weight: 600;
372
+ color: #ECEFF4;
373
+ }
374
+
375
+ .model-card-header .model-meta {
376
+ display: flex;
377
+ gap: 1.5rem;
378
+ color: #D8DEE9;
379
+ font-size: 0.95rem;
380
+ }
381
+
382
+ .model-card-header .model-meta strong {
383
+ color: #8FBCBB;
384
+ }
385
+
386
+ .leaderboard-section {
387
+ background: #3B4252;
388
+ border: 1px solid #434C5E;
389
+ border-radius: 10px;
390
+ overflow: hidden;
391
+ }
392
+
393
+ .leaderboard-section-header {
394
+ background: #434C5E;
395
+ padding: 1rem 1.25rem;
396
+ border-bottom: 1px solid #4C566A;
397
+ display: flex;
398
+ justify-content: space-between;
399
+ align-items: center;
400
+ }
401
+
402
+ .leaderboard-section-header h3 {
403
+ margin: 0;
404
+ font-size: 1rem;
405
+ font-weight: 600;
406
+ color: #88C0D0;
407
+ }
408
+
409
+ .leaderboard-section-header .lb-avg {
410
+ background: rgba(163, 190, 140, 0.15);
411
+ border: 1px solid rgba(163, 190, 140, 0.3);
412
+ border-radius: 8px;
413
+ padding: 0.5rem 1rem;
414
+ font-size: 0.85rem;
415
+ color: #D8DEE9;
416
+ }
417
+
418
+ .leaderboard-section-header .lb-avg strong {
419
+ color: #A3BE8C;
420
+ font-family: 'JetBrains Mono', monospace;
421
+ font-size: 1.1rem;
422
+ font-weight: 700;
423
+ }
424
+
425
+ .scores-grid {
426
+ display: grid;
427
+ grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
428
+ gap: 1px;
429
+ background: #434C5E;
430
+ }
431
+
432
+ .score-item {
433
+ background: #3B4252;
434
+ padding: 1rem 1.25rem;
435
+ }
436
+
437
+ .score-item .score-label {
438
+ font-size: 0.8rem;
439
+ text-transform: uppercase;
440
+ letter-spacing: 0.05em;
441
+ color: #D8DEE9;
442
+ margin-bottom: 0.375rem;
443
+ }
444
+
445
+ .score-item .score-value {
446
+ font-size: 1.5rem;
447
+ font-weight: 600;
448
+ font-family: 'JetBrains Mono', monospace;
449
+ color: #A3BE8C;
450
+ }
451
+
452
+ .score-item.highlight .score-value {
453
+ color: #88C0D0;
454
  }
455
 
456
+ .no-results {
457
+ text-align: center;
458
+ padding: 3rem 1rem;
459
+ color: #D8DEE9;
460
  }
461
 
462
+ .no-results h3 {
463
+ color: #ECEFF4;
464
+ margin-bottom: 0.5rem;
465
+ }
466
+
467
+ /* === Buttons === */
468
+ button {
469
+ border-radius: 8px !important;
470
+ font-weight: 500 !important;
471
+ font-size: 0.95rem !important;
472
+ transition: all 0.15s ease !important;
473
+ }
474
+
475
+ button.primary {
476
+ background: #88C0D0 !important;
477
+ color: #2E3440 !important;
478
+ border: none !important;
479
+ }
480
+
481
+ button.primary:hover:not(:disabled) {
482
+ background: #8FBCBB !important;
483
+ }
484
+
485
+ button.secondary,
486
+ button[variant="secondary"] {
487
+ background: #434C5E !important;
488
+ color: #ECEFF4 !important;
489
+ border: 1px solid #4C566A !important;
490
+ }
491
+
492
+ button.secondary:hover:not(:disabled),
493
+ button[variant="secondary"]:hover:not(:disabled) {
494
+ background: #4C566A !important;
495
+ }
496
+
497
+ button:disabled {
498
+ opacity: 0.35 !important;
499
+ }
500
+
501
+ /* === Inputs === */
502
+ input[type="text"],
503
+ select {
504
+ background: #2E3440 !important;
505
+ border: 1px solid #4C566A !important;
506
+ border-radius: 8px !important;
507
+ color: #ECEFF4 !important;
508
+ font-size: 1rem !important;
509
+ }
510
+
511
+ input[type="text"]:focus,
512
+ select:focus {
513
+ border-color: #88C0D0 !important;
514
+ box-shadow: 0 0 0 3px rgba(136, 192, 208, 0.15) !important;
515
+ outline: none !important;
516
+ }
517
+
518
+ input::placeholder {
519
+ color: #4C566A !important;
520
+ }
521
+
522
+ /* === Accordion === */
523
+ .accordion {
524
+ background: #3B4252 !important;
525
+ border: 1px solid #434C5E !important;
526
+ border-radius: 10px !important;
527
+ margin-top: 1.5rem !important;
528
+ }
529
+
530
+ .accordion > .label-wrap {
531
+ background: transparent !important;
532
+ padding: 1rem 1.25rem !important;
533
+ color: #D8DEE9 !important;
534
+ font-size: 0.95rem !important;
535
+ }
536
+
537
+ .accordion > .wrap {
538
+ padding: 0.5rem 1.25rem 1.25rem !important;
539
+ color: #D8DEE9 !important;
540
+ font-size: 0.95rem !important;
541
+ line-height: 1.6 !important;
542
+ }
543
+
544
+ .accordion code {
545
+ background: #434C5E !important;
546
+ padding: 0.125rem 0.375rem !important;
547
+ border-radius: 4px !important;
548
+ font-family: 'JetBrains Mono', monospace !important;
549
+ font-size: 0.8rem !important;
550
+ color: #8FBCBB !important;
551
+ }
552
+
553
+ /* === Metrics section === */
554
+ .metrics-section {
555
+ margin-top: 1.5rem;
556
+ padding-top: 1.5rem;
557
+ border-top: 1px solid #434C5E;
558
+ }
559
+
560
+ .metrics-section h3 {
561
+ font-size: 0.85rem;
562
+ font-weight: 600;
563
+ color: #D8DEE9;
564
+ margin: 0 0 1rem 0;
565
+ text-transform: uppercase;
566
+ letter-spacing: 0.05em;
567
+ }
568
+
569
+ .metrics-grid {
570
+ display: grid;
571
+ grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
572
+ gap: 0.75rem;
573
+ }
574
+
575
+ .metric-card {
576
+ background: #3B4252;
577
+ border: 1px solid #434C5E;
578
+ border-radius: 8px;
579
+ overflow: hidden;
580
+ }
581
+
582
+ .metric-card-header {
583
+ display: flex;
584
+ justify-content: space-between;
585
+ align-items: center;
586
+ padding: 0.75rem 1rem;
587
  cursor: pointer;
588
+ list-style: none;
589
+ }
590
+
591
+ .metric-card-header::-webkit-details-marker {
592
+ display: none;
593
+ }
594
+
595
+ .metric-card-name {
596
+ font-weight: 500;
597
+ font-size: 0.95rem;
598
+ color: #ECEFF4;
599
+ }
600
+
601
+ .metric-card-direction {
602
+ font-size: 0.8rem;
603
+ color: #D8DEE9;
604
+ }
605
+
606
+ .metric-card-direction .arrow {
607
+ color: #A3BE8C;
608
+ font-weight: 600;
609
+ }
610
+
611
+ .metric-card-body {
612
+ padding: 0.875rem 1.25rem;
613
+ border-top: 1px solid #434C5E;
614
+ font-size: 0.9rem;
615
+ color: #D8DEE9;
616
+ line-height: 1.5;
617
+ }
618
+
619
+ .metric-type-badge {
620
+ font-size: 0.65rem;
621
+ text-transform: uppercase;
622
+ letter-spacing: 0.05em;
623
+ padding: 0.15rem 0.4rem;
624
+ background: rgba(180, 142, 173, 0.2);
625
+ border: 1px solid rgba(180, 142, 173, 0.35);
626
+ border-radius: 4px;
627
+ color: #B48EAD;
628
+ font-family: 'JetBrains Mono', monospace;
629
+ }
630
+
631
+ /* === Scrollbar === */
632
+ ::-webkit-scrollbar {
633
+ width: 8px;
634
+ height: 8px;
635
+ }
636
+
637
+ ::-webkit-scrollbar-track {
638
+ background: #2E3440;
639
+ }
640
+
641
+ ::-webkit-scrollbar-thumb {
642
+ background: #4C566A;
643
+ border-radius: 4px;
644
+ }
645
+
646
+ ::-webkit-scrollbar-thumb:hover {
647
+ background: #5E81AC;
648
+ }
649
+
650
+ /* === Responsive === */
651
+ @media (max-width: 768px) {
652
+ .gradio-container {
653
+ padding: 1rem !important;
654
+ }
655
+
656
+ .scores-grid {
657
+ grid-template-columns: repeat(2, 1fr);
658
+ }
659
+ }
660
+
661
+ /* === Overrides === */
662
+ .gradio-container footer {
663
+ display: none !important;
664
+ }
665
+
666
+ .block {
667
+ background: #3B4252 !important;
668
+ }
669
+
670
+ .gradio-radio label {
671
+ background: #434C5E !important;
672
+ border: 1px solid #4C566A !important;
673
+ color: #ECEFF4 !important;
674
+ border-radius: 8px !important;
675
+ font-size: 0.85rem !important;
676
+ }
677
+
678
+ .gradio-radio label.selected {
679
+ background: #88C0D0 !important;
680
+ border-color: #88C0D0 !important;
681
+ color: #2E3440 !important;
682
  }
683
  """
684
 
685
 
686
  def format_leaderboard_header(selected_leaderboard, metadata):
687
+ """Formats the leaderboard header info section."""
688
  if not selected_leaderboard:
689
  return """
690
+ <div style="text-align: center; padding: 2rem 1rem; color: #D8DEE9;">
691
+ <div style="font-size: 1.1rem;">Select a leaderboard to explore</div>
 
692
  </div>
693
  """
694
 
695
  if not metadata or not metadata.get("evals"):
696
+ return f"""
697
+ <div class="info-banner">
698
+ <h3>{selected_leaderboard}</h3>
699
+ </div>
700
+ """
701
 
702
  source_info = metadata.get("source_info", {})
703
  org = source_info.get("organization", "Unknown")
 
704
  url = source_info.get("url", "#")
705
  eval_names = list(metadata["evals"].keys())
706
 
707
+ eval_tags = "".join([f'<span class="eval-tag">{name}</span>' for name in eval_names])
 
 
 
 
 
 
 
 
 
 
 
 
 
708
 
709
  return f"""
710
+ <div class="info-banner">
711
+ <div style="display: flex; justify-content: space-between; align-items: center; gap: 1rem;">
712
+ <div style="display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;">
713
+ <h3 style="margin: 0;">{selected_leaderboard}</h3>
714
+ <span style="color: #D8DEE9; font-size: 0.8rem;">by {org}</span>
715
+ <div class="eval-tags" style="margin: 0;">{eval_tags}</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
716
  </div>
717
  <a href="{url}" target="_blank" style="
718
+ font-size: 0.75rem;
719
+ color: #88C0D0;
720
  text-decoration: none;
721
+ padding: 0.375rem 0.75rem;
722
+ border: 1px solid rgba(136, 192, 208, 0.4);
723
  border-radius: 6px;
 
 
 
724
  white-space: nowrap;
725
+ ">Source →</a>
 
 
726
  </div>
727
  </div>
728
  """
729
 
730
 
731
  def format_metric_details(selected_leaderboard, metadata):
732
+ """Formats metric detail cards."""
733
  if not selected_leaderboard or not metadata or not metadata.get("evals"):
734
  return ""
735
 
736
  evals = metadata.get("evals", {})
737
 
738
  html = """
739
+ <div class="metrics-section">
740
+ <h3>Metric Reference</h3>
741
+ <div class="metrics-grid">
 
 
 
 
 
 
742
  """
743
 
744
  for eval_name, info in evals.items():
745
+ score_type = info['score_type'].upper() if info.get('score_type') else ""
746
  direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
747
+ arrow = "↓" if info.get('lower_is_better') else "↑"
748
 
749
+ details = ""
750
  if info.get('score_type') == "continuous" and info.get('min_score') is not None:
751
+ details = f"Range: [{info['min_score']} {info['max_score']}]"
752
  elif info.get('score_type') == "levels" and info.get('level_names'):
753
+ details = f"Levels: {', '.join(str(l) for l in info['level_names'])}"
 
754
 
 
 
 
755
  html += f"""
756
+ <details class="metric-card">
757
+ <summary class="metric-card-header">
758
+ <span class="metric-card-name">{eval_name}</span>
759
+ <span class="metric-card-direction"><span class="arrow">{arrow}</span> {direction}</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  </summary>
761
+ <div class="metric-card-body">
762
+ <div>{info.get('description', 'No description')}</div>
763
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-top: 0.5rem;">
764
+ <span style="font-size: 0.75rem; color: #D8DEE9;">{details}</span>
765
+ <span class="metric-type-badge">{score_type}</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766
  </div>
767
  </div>
768
  </details>
769
  """
770
 
771
+ html += "</div></div>"
772
  return html
773
 
774
+
775
+ def format_model_card(model_name, model_data):
776
+ """Formats a model card showing all evals across leaderboards."""
777
+ if not model_data:
778
+ return """
779
+ <div class="no-results">
780
+ <h3>No results found</h3>
781
+ <p>Try searching for a different model name</p>
782
+ </div>
783
+ """
784
+
785
+ first = list(model_data.values())[0]
786
+ developer = first.get("developer", "Unknown")
787
+ params = first.get("params")
788
+ arch = first.get("architecture", "Unknown")
789
+
790
+ params_str = f"{params}B" if params else "—"
791
+
792
+ html = f"""
793
+ <div class="model-card-container">
794
+ <div class="model-card-header">
795
+ <h2>{model_name}</h2>
796
+ <div class="model-meta">
797
+ <span><strong>Developer:</strong> {developer}</span>
798
+ <span><strong>Parameters:</strong> {params_str}</span>
799
+ <span><strong>Architecture:</strong> {arch}</span>
800
+ </div>
801
+ </div>
802
+ """
803
+
804
+ for leaderboard_name, data in model_data.items():
805
+ results = data.get("results", {})
806
+ if not results:
807
+ continue
808
+
809
+ scores = [v for v in results.values() if v is not None]
810
+ avg = sum(scores) / len(scores) if scores else None
811
+ avg_str = f"{avg:.3f}" if avg else "—"
812
+
813
+ html += f"""
814
+ <div class="leaderboard-section">
815
+ <div class="leaderboard-section-header">
816
+ <h3>{leaderboard_name}</h3>
817
+ <span class="lb-avg">Avg: <strong>{avg_str}</strong></span>
818
+ </div>
819
+ <div class="scores-grid">
820
+ """
821
+
822
+ sorted_results = sorted(results.items(), key=lambda x: x[1] if x[1] is not None else 0, reverse=True)
823
+
824
+ for i, (metric_name, score) in enumerate(sorted_results):
825
+ score_display = f"{score:.3f}" if score is not None else "—"
826
+ highlight_class = "highlight" if i == 0 else ""
827
+
828
+ html += f"""
829
+ <div class="score-item {highlight_class}">
830
+ <div class="score-label">{metric_name}</div>
831
+ <div class="score-value">{score_display}</div>
832
+ </div>
833
+ """
834
+
835
+ html += "</div></div>"
836
+
837
+ html += "</div>"
838
+ return html
upload_to_hf.py DELETED
@@ -1,122 +0,0 @@
1
- """
2
- Upload changed parquet files to HuggingFace dataset.
3
-
4
- This script:
5
- 1. Reads the manifest of changed leaderboards
6
- 2. Uploads ONLY the changed parquet files
7
- 3. Uses HfApi for efficient individual file uploads
8
-
9
- Usage:
10
- # With HF_TOKEN environment variable (GitHub Actions):
11
- python upload_to_hf.py
12
-
13
- # Interactive login (local):
14
- python upload_to_hf.py --login
15
- """
16
-
17
- from huggingface_hub import login, HfFolder, HfApi
18
- import pandas as pd
19
- from pathlib import Path
20
- import sys
21
- import os
22
- import json
23
-
24
- HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "deepmage121/eee_test")
25
- PARQUET_DIR = Path("parquet_output")
26
- MANIFEST_PATH = PARQUET_DIR / "changed_leaderboards.json"
27
-
28
- def upload_changed_parquets():
29
- """
30
- Upload only changed parquet files from manifest.
31
- """
32
-
33
- hf_token = os.environ.get("HF_TOKEN")
34
- if hf_token:
35
- print("Using HF_TOKEN from environment")
36
- HfFolder.save_token(hf_token)
37
- elif "--login" in sys.argv:
38
- print("Logging in to HuggingFace...")
39
- login()
40
- else:
41
- if not HfFolder.get_token():
42
- print("ERROR: Not logged in. Run with --login flag or set HF_TOKEN environment variable")
43
- sys.exit(1)
44
- print("Using existing HuggingFace token")
45
-
46
- api = HfApi()
47
-
48
- if not MANIFEST_PATH.exists():
49
- print(f"ERROR: No manifest found at {MANIFEST_PATH}")
50
- print("Run convert_changed_to_parquet.py first to generate the manifest")
51
- sys.exit(1)
52
-
53
- manifest = json.loads(MANIFEST_PATH.read_text())
54
- converted_leaderboards = manifest.get("converted", [])
55
-
56
- if not converted_leaderboards:
57
- print("\nNo changed leaderboards to upload (per manifest)")
58
- sys.exit(0)
59
-
60
- print(f"\nManifest found: {len(converted_leaderboards)} leaderboard(s) to upload")
61
-
62
- files_to_upload = [
63
- PARQUET_DIR / f"{lb}.parquet"
64
- for lb in converted_leaderboards
65
- ]
66
-
67
- files_to_upload = [f for f in files_to_upload if f.exists()]
68
-
69
- if not files_to_upload:
70
- print(f"ERROR: No parquet files to upload in {PARQUET_DIR}")
71
- sys.exit(1)
72
-
73
- print(f"\nUploading {len(files_to_upload)} parquet file(s):")
74
- for pf in files_to_upload:
75
- print(f" - {pf.stem}")
76
-
77
- uploaded_count = 0
78
- error_count = 0
79
-
80
- for parquet_file in files_to_upload:
81
- leaderboard_name = parquet_file.stem
82
-
83
- path_in_repo = f"data/{leaderboard_name}/data-00000-of-00001.parquet"
84
-
85
- try:
86
- print(f"\nUploading: {leaderboard_name}")
87
-
88
- df = pd.read_parquet(parquet_file)
89
- print(f" {len(df)} rows, {len(df.columns)} columns")
90
-
91
- api.upload_file(
92
- path_or_fileobj=str(parquet_file),
93
- path_in_repo=path_in_repo,
94
- repo_id=HF_DATASET_REPO,
95
- repo_type="dataset",
96
- commit_message=f"Update {leaderboard_name} leaderboard data"
97
- )
98
-
99
- print(f" SUCCESS: Uploaded → {path_in_repo}")
100
- uploaded_count += 1
101
-
102
- except Exception as e:
103
- print(f" ERROR: Error uploading {leaderboard_name}: {e}")
104
- error_count += 1
105
-
106
- print(f"\n{'='*70}")
107
- print(f"Upload Summary:")
108
- print(f"{'='*70}")
109
- print(f" Successfully uploaded: {uploaded_count} file(s)")
110
- print(f" Errors: {error_count} file(s)")
111
- print(f"{'='*70}")
112
-
113
- if error_count > 0:
114
- print(f"\nWARNING: {error_count} file(s) failed to upload")
115
- sys.exit(1)
116
-
117
- print(f"\nSuccessfully uploaded to HuggingFace!")
118
- print(f"View at: https://huggingface.co/datasets/{HF_DATASET_REPO}")
119
-
120
-
121
- if __name__ == "__main__":
122
- upload_changed_parquets()