Spaces:

deepmage121
/

EEE_viz

Running

App Files Files Community

deepmage121 commited on 20 days ago

Commit

d0ab546

1 Parent(s): 49c1354

initial commit, space + other info related to action

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/sync-to-hf.yml +55 -0
.gitignore +8 -0
.python-version +1 -0
app.py +127 -531
data_loader.py +317 -0
eval.schema.json +282 -0
hf_operations.py +202 -0
json_to_parquet.py +228 -0
leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json +0 -107
leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json +0 -107
leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json +0 -107
leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json +0 -107
leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json +0 -107
leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json +0 -107
leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json +0 -107
leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json +0 -107
leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json +0 -107
leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json +0 -107
leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json +0 -107
leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json +0 -107
leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json +0 -107
leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json +0 -107
leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json +0 -107
leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json +0 -107
leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json +0 -107
leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json +0 -107
leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json +0 -107
leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json +0 -107
leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json +0 -107
leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json +0 -107
leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json +0 -107
leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json +0 -107

.github/workflows/sync-to-hf.yml ADDED Viewed

	@@ -0,0 +1,55 @@

+name: Sync to HuggingFace Dataset
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'data/**/*.json'
+  workflow_dispatch:  # Allow manual trigger
+jobs:
+  sync-to-huggingface:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install dependencies
+        run: |
+          pip install datasets huggingface_hub pandas pyarrow
+      - name: Convert Changed JSONs to Parquet (Optimized)
+        env:
+          HF_DATASET_REPO: deepmage121/eee_test
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          echo "Detecting changed leaderboards..."
+          python scripts/convert_to_parquet.py
+      - name: Upload Changed Parquets to HuggingFace
+        env:
+          HF_DATASET_REPO: deepmage121/eee_test
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          echo "Uploading changed parquets..."
+          python scripts/upload_to_hf.py
+      - name: Report status
+        if: success()
+        run: |
+          echo "Successfully synced to HuggingFace dataset"
+          echo "View at: https://huggingface.co/datasets/deepmage121/eee_test"
+          if [ -f parquet_output/changed_leaderboards.json ]; then
+            echo ""
+            echo "Changes processed:"
+            cat parquet_output/changed_leaderboards.json
+          fi

.gitignore CHANGED Viewed

	@@ -1 +1,9 @@
1	.DS_Store

 .DS_Store
+.secrets
+.actrc
+__pycache__/
+*.pyc
+parquet_output/
+*.venv*
+*.md
+*.ipynb_checkpoints

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

app.py CHANGED Viewed

@@ -1,479 +1,106 @@
 import gradio as gr
 import pandas as pd
-import json
 from pathlib import Path
-DATA_DIR = Path("leaderboard_data")
-LEADERBOARD_CACHE = {}
-def parse_eval_json(file_path):
-    """Parses a single JSON file to extract model, provider, and results."""
-    try:
-        with open(file_path, 'r') as f:
-            data = json.load(f)
-        leaderboard_name = data.get("evaluation_source", {}).get("evaluation_source_name", "Unknown Leaderboard")
-        provider_name = data.get("source_metadata", {}).get("source_organization_name", "Unknown Provider")
-        model_id = data.get("model_info", {}).get("id", "Unknown Model")
-        developer_name = data.get("model_info", {}).get("developer", "Unknown Developer")
-        params = data.get("model_info", {}).get("params_billions", None)
-        architecture = data.get("model_info", {}).get("architecture", "Unknown")
-        precision = data.get("additional_details", {}).get("precision", "Unknown")
-        if precision == "Unknown":
-             precision = data.get("model_info", {}).get("precision", "Unknown")
-        results = {}
-        if "evaluation_results" in data:
-            for res in data["evaluation_results"]:
-                eval_name = res.get("evaluation_name", "Unknown Metric")
-                score = res.get("score_details", {}).get("score", None)
-                if score is not None:
-                    results[eval_name] = score
-        return {
-            "leaderboard": leaderboard_name,
-            "provider": provider_name,
-            "model": model_id,
-            "developer": developer_name,
-            "params": params,
-            "architecture": architecture,
-            "precision": precision,
-            "results": results,
-            "raw_data": data
-        }
-    except Exception as e:
-        print(f"Error parsing {file_path}: {e}")
-        return None
-def get_available_leaderboards():
-    """Scans data directory for leaderboard folders."""
-    if not DATA_DIR.exists():
-        return []
-    return [d.name for d in DATA_DIR.iterdir() if d.is_dir()]
-def normalize_leaderboard_name(name):
-    """Normalizes leaderboard name to remove spaces."""
-    return name.replace(" ", "")
-def sanitize_filename_component(name):
-    """Sanitizes a name to be safe for use in directory names."""
-    return name.replace("/", "_").replace("\\", "_").replace(":", "_").strip()
-def walk_eval_files(leaderboard_name):
-    """Generator that walks through Leaderboard directory recursively."""
-    lb_path = DATA_DIR / leaderboard_name
-    if not lb_path.exists():
-        return
-    yield from lb_path.rglob("*.json")
-def get_eval_metadata(selected_leaderboard):
-    """Extracts evaluation metadata from the leaderboard data."""
-    if not selected_leaderboard:
-        return {}
-    eval_metadata = {"evals": {}, "source_info": {}}
-    for json_file in walk_eval_files(selected_leaderboard):
-        parsed = parse_eval_json(json_file)
-        if parsed:
-            if not eval_metadata["source_info"]:
-                 source_meta = parsed["raw_data"].get("source_metadata", {})
-                 source_data_list = parsed["raw_data"].get("source_data", [])
-                 url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#"
-                 eval_metadata["source_info"] = {
-                     "organization": source_meta.get("source_organization_name", "Unknown"),
-                     "relationship": source_meta.get("evaluator_relationship", "Unknown"),
-                     "url": url
-                 }
-            if "evaluation_results" in parsed["raw_data"]:
-                for res in parsed["raw_data"]["evaluation_results"]:
-                    eval_name = res.get("evaluation_name", "Unknown Metric")
-                    if eval_name not in eval_metadata["evals"]:
-                        metric_config = res.get("metric_config", {})
-                        eval_metadata["evals"][eval_name] = {
-                            "description": metric_config.get("evaluation_description", "No description available"),
-                            "score_type": metric_config.get("score_type", "unknown"),
-                            "lower_is_better": metric_config.get("lower_is_better", False),
-                            "min_score": metric_config.get("min_score"),
-                            "max_score": metric_config.get("max_score"),
-                            "level_names": metric_config.get("level_names", []),
-                            "level_metadata": metric_config.get("level_metadata", []),
-                            "has_unknown_level": metric_config.get("has_unknown_level", False)
-                        }
-            break
-    return eval_metadata
-def format_eval_info_html(selected_leaderboard):
-    """Formats evaluation metadata into a responsive HTML grid."""
     if not selected_leaderboard:
-        return """
-        <div style="text-align: center; padding: 3rem; color: var(--body-text-color-subdued);">
-            <h3>👋 Welcome to Eval Leaderboard</h3>
-            <p>Select a leaderboard above to visualize results and metadata.</p>
-        </div>
-        """
-    metadata = get_eval_metadata(selected_leaderboard)
-    if not metadata or not metadata.get("evals"):
-        return f"""<div style="padding: 1rem;">No metadata found for {selected_leaderboard}</div>"""
-    source_info = metadata.get("source_info", {})
-    evals = metadata.get("evals", {})
-    unique_evals_count = len(evals)
-    eval_badges = "".join([
-        f'<span style="background: var(--background-fill-secondary); border: 1px solid var(--border-color-primary); padding: 2px 8px; border-radius: 4px; font-size: 0.85rem; white-space: nowrap;">{name}</span>'
-        for name in sorted(evals.keys())
-    ])
-    source_url = source_info.get('url', '#')
-    source_link = f'<a href="{source_url}" target="_blank" style="text-decoration: none; color: var(--link-text-color); hover: underline;">🔗 {source_info.get("organization", "Unknown")}</a>'
-    html = f"""
-    <div style="
-        background: var(--block-background-fill);
-        border: 1px solid var(--border-color-primary);
-        border-radius: 8px;
-        padding: 1.5rem;
-        margin-bottom: 2rem;
-        box-shadow: var(--shadow-sm);
-    ">
-        <h2 style="margin-top: 0; margin-bottom: 1rem;">📊 {selected_leaderboard}</h2>
-        <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem;">
-            <div>
-                <div style="font-size: 0.85rem; color: var(--body-text-color-subdued); text-transform: uppercase; letter-spacing: 0.05em; font-weight: 600;">Source Organization</div>
-                <div style="font-size: 1.1rem; font-weight: 500;">{source_link}</div>
-            </div>
-            <div>
-                <div style="font-size: 0.85rem; color: var(--body-text-color-subdued); text-transform: uppercase; letter-spacing: 0.05em; font-weight: 600;">Evaluator Relationship</div>
-                <div style="font-size: 1.1rem; font-weight: 500;">{source_info.get('relationship', 'Unknown').replace('_', ' ').title()}</div>
-            </div>
-            <div>
-                <div style="font-size: 0.85rem; color: var(--body-text-color-subdued); text-transform: uppercase; letter-spacing: 0.05em; font-weight: 600; margin-bottom: 0.5rem;">Included Evaluations</div>
-                <div style="display: flex; flex-wrap: wrap; gap: 0.5rem;">{eval_badges}</div>
-            </div>
-        </div>
-    </div>
-    <h3 style="margin-bottom: 1rem;">Metric Details</h3>
-    """
-    html += """
-    <div style="
-        display: grid;
-        grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
-        gap: 1rem;
-    ">
-    """
-    for eval_name, info in evals.items():
-        score_type = info['score_type'].upper() if info['score_type'] else "UNKNOWN"
-        direction = "Lower is better" if info['lower_is_better'] else "Higher is better"
-        direction_icon = "↓" if info['lower_is_better'] else "↑"
-        details_content = ""
-        if info['score_type'] == "continuous" and info.get('min_score') is not None:
-            details_content += f"<div><span style='opacity: 0.7;'>Range:</span> <strong>[{info['min_score']} - {info['max_score']}]</strong></div>"
-        elif info['score_type'] == "levels" and info.get('level_names'):
-            levels = ", ".join(info['level_names'])
-            details_content += f"<div><span style='opacity: 0.7;'>Levels:</span> <strong>{levels}</strong></div>"
-        if info.get('has_unknown_level'):
-             details_content += "<div style='margin-top: 0.25rem; font-size: 0.8rem; opacity: 0.7;'>* -1 indicates Unknown</div>"
-        html += f"""
-        <details style="
-            background: var(--background-fill-secondary);
-            border: 1px solid var(--border-color-primary);
-            border-radius: 6px;
-            overflow: hidden;
-            height: fit-content;
-        ">
-            <summary style="
-                padding: 0.75rem 1rem;
-                cursor: pointer;
-                font-weight: 600;
-                display: flex;
-                align-items: center;
-                justify-content: space-between;
-                list-style: none;
-                font-size: 0.95rem;
-            ">
-                <div style="display: flex; align-items: center; gap: 0.5rem;">
-                   <span style="font-size: 1.1rem; opacity: 0.8;">🏷️</span>
-                   <span style="white-space: nowrap; overflow: hidden; text-overflow: ellipsis;">{eval_name}</span>
-                </div>
-                <div style="display: flex; align-items: center; gap: 0.5rem;">
-                    <span style="font-size: 0.8rem; font-weight: 400; color: var(--body-text-color-subdued); white-space: nowrap;">{direction_icon} {direction}</span>
-                </div>
-            </summary>
-            <div style="
-                padding: 0.75rem 1rem;
-                border-top: 1px solid var(--border-color-primary);
-                background: var(--block-background-fill);
-                font-size: 0.9rem;
-            ">
-                <p style="margin: 0 0 0.5rem 0; color: var(--body-text-color-subdued); line-height: 1.4;">
-                    {info['description']}
-                </p>
-                <div style="display: flex; justify-content: space-between; align-items: flex-end; margin-top: 0.5rem;">
-                    <div style="font-size: 0.85rem;">
-                        {details_content}
-                    </div>
-                    <span style="
-                        font-size: 0.7rem;
-                        padding: 1px 6px;
-                        border-radius: 4px;
-                        background: var(--background-fill-primary);
-                        border: 1px solid var(--border-color-primary);
-                        color: var(--body-text-color-subdued);
-                    ">{score_type}</span>
-                </div>
-            </div>
-        </details>
-        """
-    html += "</div>"
-    return html
-def update_leaderboard_table(selected_leaderboard, search_query="", group_by_model=False, progress=gr.Progress()):
     """Loads and aggregates data for the selected leaderboard."""
     if not selected_leaderboard:
-        return pd.DataFrame(), format_eval_info_html(None)
-    # Check cache
-    full_df = None
-    if selected_leaderboard in LEADERBOARD_CACHE:
-         # Cache stores (df, meta_html)
-         full_df, meta_html = LEADERBOARD_CACHE[selected_leaderboard]
-    else:
-        progress(0, desc=f"Scanning {selected_leaderboard}...")
-        all_files = list(walk_eval_files(selected_leaderboard))
-        total_files = len(all_files)
-        rows = []
-        for i, json_file in enumerate(all_files):
-            if i % 100 == 0:
-                 progress((i / total_files), desc=f"Loading {selected_leaderboard}...")
-            parsed = parse_eval_json(json_file)
-            if parsed:
-                row = {
-                    "Model": parsed["model"],
-                    "Developer": parsed["developer"],
-                    "Params (B)": parsed["params"],
-                    "Arch": parsed["architecture"],
-                    "Precision": parsed["precision"]
-                }
-                row.update(parsed["results"])
-                rows.append(row)
-        meta_html = format_eval_info_html(selected_leaderboard)
-        if not rows:
-            full_df = pd.DataFrame(columns=["Model", "Developer", "Params (B)", "Arch", "Precision", "Score"])
-        else:
-            full_df = pd.DataFrame(rows)
-            numeric_cols = full_df.select_dtypes(include=['float', 'int']).columns
-            full_df[numeric_cols] = full_df[numeric_cols].round(3)
-        LEADERBOARD_CACHE[selected_leaderboard] = (full_df, meta_html)
-    # Filter by search query
-    df = full_df.copy()
-    if search_query:
-        df = df[
-            df["Model"].str.contains(search_query, case=False, na=False) |
-            df["Developer"].str.contains(search_query, case=False, na=False)
-        ]
-    # Group by model and average scores if requested
-    if group_by_model and not df.empty:
-        # Identify grouping columns (non-numeric usually, or specific base cols)
-        # We group by the base identifiers.
-        base_cols_all = ["Model", "Developer", "Params (B)", "Arch", "Precision"]
-        group_cols = [c for c in base_cols_all if c in df.columns]
-        # Identify columns to average (numeric)
-        numeric_cols = df.select_dtypes(include=['number']).columns
-        # Exclude group_cols from numeric_cols if they happen to be numeric (like Params)
-        # But groupby keys can be numeric.
-        # We want to average the SCORES.
-        # Any numeric column NOT in group_cols should be averaged.
-        agg_cols = [c for c in numeric_cols if c not in group_cols]
-        if group_cols and agg_cols:
-             df = df.groupby(group_cols)[agg_cols].mean().reset_index()
-             df = df.round(3)
-    # Drop columns where all values are null
-    df = df.dropna(axis=1, how='all')
-    if df.empty:
-         return df, meta_html
-    # Filter base_cols to only include columns that exist in df (in case some were dropped)
-    base_cols = [c for c in ["Model", "Developer", "Params (B)", "Arch", "Precision"] if c in df.columns]
-    eval_cols = [c for c in df.columns if c not in base_cols]
-    cols = base_cols + eval_cols
-    return df[cols], meta_html
-def find_json_files(path):
-    """Recursively finds all JSON files in a directory or returns the file if it's a JSON file."""
-    json_files = []
-    path_obj = Path(path)
-    if path_obj.is_file() and path_obj.suffix == ".json":
-        json_files.append(path_obj)
-    elif path_obj.is_dir():
-        json_files.extend(path_obj.rglob("*.json"))
-    return json_files
-def check_is_duplicate(save_dir, new_eval_id):
-    """Checks if a file with the same evaluation_id already exists in the directory."""
-    if not new_eval_id or not save_dir.exists():
-        return False
-    for existing_file in save_dir.glob("*.json"):
-        try:
-            with open(existing_file, 'r') as f:
-                data = json.load(f)
-                if data.get("evaluation_id") == new_eval_id:
-                    return True
-        except:
-            continue
-    return False
-def handle_file_upload(files, progress=gr.Progress()):
-    """Processes uploaded files/folders and saves them to the correct structure.
-    Structure: Leaderboard/Provider/Model/<uuid>.json
-    Preserves original filename (which already contains the UUID).
-    """
-    if not files:
-        return gr.update(), "No files uploaded."
-    saved_count = 0
-    all_json_files = []
-    skipped_count = 0
-    duplicate_count = 0
-    progress(0, desc="Scanning files...")
-    for file_obj in files:
-        path = file_obj.name if hasattr(file_obj, "name") else file_obj
-        json_files = find_json_files(path)
-        if Path(path).is_file() and Path(path).suffix != ".json":
-             skipped_count += 1
-        all_json_files.extend(json_files)
-    total_files = len(all_json_files)
-    for i, json_file in enumerate(all_json_files):
-        progress((i / total_files), desc=f"Processing {json_file.name}...")
-        try:
-            parsed = parse_eval_json(json_file)
-            if not parsed:
-                continue
-            leaderboard = normalize_leaderboard_name(parsed["leaderboard"])
-            provider = parsed["provider"]
-            model_id = parsed["model"]
-            developer = parsed["developer"]
-            eval_id = parsed["raw_data"].get("evaluation_id")
-            # Sanitize names for directory structure
-            sanitized_provider = sanitize_filename_component(developer)
-            sanitized_model = sanitize_filename_component(model_id)
-            # Create structure: Leaderboard/Developer/Model
-            save_dir = DATA_DIR / leaderboard / sanitized_provider / sanitized_model
-            save_dir.mkdir(parents=True, exist_ok=True)
-            # Check for duplicates based on evaluation_id
-            if check_is_duplicate(save_dir, eval_id):
-                duplicate_count += 1
-                continue
-            # Preserve original filename
-            filename = json_file.name
-            save_path = save_dir / filename
-            # Avoid overwriting by appending counter
-            counter = 1
-            while save_path.exists():
-                stem = save_path.stem.rsplit('_', 1)[0] if '_' in save_path.stem else save_path.stem
-                save_path = save_dir / f"{stem}_{counter}.json"
-                counter += 1
-            with open(save_path, 'w') as f:
-                json.dump(parsed["raw_data"], f, indent=2)
-            saved_count += 1
-        except Exception as e:
-            print(f"Failed to save {json_file}: {e}")
-    # Clear cache since data changed
-    LEADERBOARD_CACHE.clear()
-    # Refresh leaderboard choices
-    choices = get_available_leaderboards()
-    msg_parts = [f"Processed {saved_count} files."]
-    if duplicate_count > 0:
-        msg_parts.append(f"Skipped {duplicate_count} duplicates.")
-    if skipped_count > 0:
-        msg_parts.append(f"Skipped {skipped_count} non-JSON files.")
-    return gr.Dropdown(choices=choices), " ".join(msg_parts), None, None
-# Professional, high-contrast theme
-theme = gr.themes.Soft(
-    primary_hue="slate",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
-).set(
-    body_background_fill="var(--neutral-50)",
-    block_background_fill="white",
-    block_border_width="1px",
-    block_title_text_weight="600"
-)
-css = """
-/* Clean up the global container */
-.gradio-container {
-    max-width: 100% !important;
-    padding: 0 2rem !important;
-}
-/* Table Styles */
-.dataframe {
-    border: 1px solid var(--border-color-primary) !important;
-    border-radius: 8px;
-}
-/* Hide file list in uploaders */
-.file-preview {
-    display: none !important;
-}
-"""
-with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo:
     with gr.Row(variant="compact", elem_classes="header-row"):
         with gr.Column(scale=1):
             gr.Markdown("# 🏆 Evaluation Leaderboard")
             gr.Markdown("Analyze and compare model performance metrics.", elem_classes="subtitle")
-    with gr.Row(variant="panel", equal_height=True):
         initial_choices = get_available_leaderboards()
         initial_value = initial_choices[0] if initial_choices else None
@@ -482,56 +109,51 @@ with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo:
                 choices=initial_choices,
                 value=initial_value,
                 label="Current Leaderboard",
-                interactive=True,
-                container=False,
-                scale=1
-            )
-        with gr.Column(scale=2):
-            search_box = gr.Textbox(
-                label="Search Model/Developer",
-                placeholder="🔍 Search model or developer...",
-                show_label=False,
-                container=False,
-                scale=1
-            )
-        with gr.Column(scale=1, min_width=100):
-            group_by_model = gr.Checkbox(
-                label="Average by Model",
-                value=False,
-                container=False
             )
-        with gr.Column(scale=1, min_width=100):
              refresh_btn = gr.Button("🔄 Refresh", variant="secondary", size="sm")
-    with gr.Accordion("📤 Upload New Data", open=False):
-        upload_mode = gr.Radio(
-            choices=["Files", "Folder"],
-            value="Files",
-            label="Upload Mode",
-            info="Choose 'Files' for individual JSONs, or 'Folder' to upload a directory structure."
-        )
-        with gr.Group(visible=True) as file_upload_group:
-            file_uploader_files = gr.File(
-                file_count="multiple",
-                file_types=[".json"],
-                label="Select JSON Files"
-            )
-        with gr.Group(visible=False) as folder_upload_group:
-            file_uploader_folder = gr.File(
-                file_count="directory",
-                label="Select Folder"
-            )
-        upload_status = gr.Textbox(
-            label="Upload Status",
-            interactive=False
-        )
-    init_df, init_meta = update_leaderboard_table(initial_value)
-    metadata_view = gr.HTML(value=init_meta)
     leaderboard_table = gr.Dataframe(
         value=init_df,
@@ -541,59 +163,33 @@ with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo:
         elem_classes="dataframe"
     )
-    def toggle_upload_input(mode):
-        return {
-            file_upload_group: gr.Group(visible=(mode == "Files")),
-            folder_upload_group: gr.Group(visible=(mode == "Folder"))
-        }
-    upload_mode.change(
-        fn=toggle_upload_input,
-        inputs=[upload_mode],
-        outputs=[file_upload_group, folder_upload_group]
-    )
-    file_uploader_files.upload(
-        fn=handle_file_upload,
-        inputs=[file_uploader_files],
-        outputs=[leaderboard_selector, upload_status, file_uploader_files, file_uploader_folder]
-    )
-    file_uploader_folder.upload(
-        fn=handle_file_upload,
-        inputs=[file_uploader_folder],
-        outputs=[leaderboard_selector, upload_status, file_uploader_files, file_uploader_folder]
-    )
     leaderboard_selector.change(
         fn=update_leaderboard_table,
-        inputs=[leaderboard_selector, search_box, group_by_model],
-        outputs=[leaderboard_table, metadata_view]
-    )
-    search_box.change(
-        fn=update_leaderboard_table,
-        inputs=[leaderboard_selector, search_box, group_by_model],
-        outputs=[leaderboard_table, metadata_view]
     )
-    group_by_model.change(
-        fn=update_leaderboard_table,
-        inputs=[leaderboard_selector, search_box, group_by_model],
-        outputs=[leaderboard_table, metadata_view]
     )
     refresh_btn.click(
-        fn=lambda: (gr.Dropdown(choices=get_available_leaderboards()), "Refreshed."),
-        outputs=[leaderboard_selector, upload_status]
     ).then(
-        fn=lambda: LEADERBOARD_CACHE.clear()
     ).then(
         fn=update_leaderboard_table,
-        inputs=[leaderboard_selector, search_box, group_by_model],
-        outputs=[leaderboard_table, metadata_view]
     )
     DATA_DIR.mkdir(exist_ok=True)
 if __name__ == "__main__":

+"""
+Evaluation Leaderboard - Gradio Interface
+Displays model evaluation results from HuggingFace datasets.
+"""
 import gradio as gr
 import pandas as pd
 from pathlib import Path
+# Import custom modules
+from data_loader import (
+    load_hf_dataset_on_startup,
+    get_available_leaderboards,
+    get_eval_metadata,
+    build_leaderboard_table,
+    clear_cache,
+    DATA_DIR
+)
+from ui_components import get_theme, get_custom_css, format_leaderboard_header, format_metric_details
+def export_leaderboard_to_json(selected_leaderboard):
+    """Export current leaderboard to JSON files in a zip using parquet_to_folder."""
     if not selected_leaderboard:
+        return None
+    import tempfile
+    import shutil
+    import zipfile
+    from json_to_parquet import parquet_to_folder
+    try:
+        # Find the parquet file in DATA_DIR
+        parquet_path = DATA_DIR / selected_leaderboard / f"{selected_leaderboard}.parquet"
+        if not parquet_path.exists():
+            print(f"Parquet file not found: {parquet_path}")
+            return None
+        # Create temp directory for export
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            output_dir = temp_path / "json_export"
+            output_dir.mkdir()
+            # Use the round-trip functionality from json_to_parquet
+            parquet_to_folder(str(parquet_path), str(output_dir))
+            # Create zip file
+            zip_path = temp_path / f"{selected_leaderboard}_export.zip"
+            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                for json_file in output_dir.rglob("*.json"):
+                    arcname = json_file.relative_to(output_dir)
+                    zipf.write(json_file, arcname)
+            # Copy to a permanent location for download
+            final_zip = Path(tempfile.gettempdir()) / f"{selected_leaderboard}_export.zip"
+            shutil.copy(zip_path, final_zip)
+            return str(final_zip)
+    except Exception as e:
+        print(f"Export error: {e}")
+        return None
+def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.Progress()):
     """Loads and aggregates data for the selected leaderboard."""
     if not selected_leaderboard:
+        return pd.DataFrame(), "", format_leaderboard_header(None, {}), format_metric_details(None, {})
+    metadata = get_eval_metadata(selected_leaderboard)
+    def progress_callback(value, desc):
+        progress(value, desc=desc)
+    df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
+    total_count = len(df)
+    # Apply search filter (searches all columns)
+    if search_query and not df.empty:
+        mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
+        df = df[mask]
+    # Build search status message
+    if search_query:
+        search_msg = f"Showing {len(df)} of {total_count} results for '{search_query}'"
+    else:
+        search_msg = f"Showing {len(df)} results"
+    return df, search_msg, format_leaderboard_header(selected_leaderboard, metadata), format_metric_details(selected_leaderboard, metadata)
+# Load HF dataset BEFORE building the interface
+load_hf_dataset_on_startup()
+# Build Gradio interface
+with gr.Blocks(title="Eval Leaderboard", theme=get_theme(), css=get_custom_css()) as demo:
     with gr.Row(variant="compact", elem_classes="header-row"):
         with gr.Column(scale=1):
             gr.Markdown("# 🏆 Evaluation Leaderboard")
             gr.Markdown("Analyze and compare model performance metrics.", elem_classes="subtitle")
+    with gr.Row(variant="panel"):
         initial_choices = get_available_leaderboards()
         initial_value = initial_choices[0] if initial_choices else None
                 choices=initial_choices,
                 value=initial_value,
                 label="Current Leaderboard",
+                interactive=True
             )
+        with gr.Column(scale=3):
+             search_box = gr.Textbox(
+                label="Search",
+                placeholder="Type to search across all columns...",
+                show_label=False
+             )
+        with gr.Column(scale=1):
              refresh_btn = gr.Button("🔄 Refresh", variant="secondary", size="sm")
+    with gr.Accordion("ℹ️ How to Submit Data", open=False):
+        gr.Markdown("""
+### Submitting Evaluation Data
+**Data submissions happen via GitHub Pull Requests:**
+1. **Fork** [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
+2. **Add your JSON files** to `data/<leaderboard>/<developer>/<model>/`
+3. **Create a Pull Request**
+4. **Automated validation** checks your data
+5. **After merge**: GitHub Actions automatically syncs to HuggingFace
+6. **Refresh this page** to see your data!
+#### File Structure
+```
+data/
+└── YourBenchmark/
+    └── developer_name/
+        └── model_name/
+            └── {uuid}.json
+```
+Each JSON file should follow the schema and be named with a unique UUID.
+📖 [**Full Submission Guide**](https://github.com/evaleval/every_eval_ever#contributor-guide) |
+📋 [**JSON Schema**](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) |
+👀 [**See Examples**](https://github.com/evaleval/every_eval_ever/tree/main/data)
+        """)
+    init_df, init_search_msg, init_header, init_metrics = update_leaderboard_table(initial_value)
+    header_view = gr.HTML(value=init_header)
+    search_info = gr.Markdown(value=init_search_msg)
     leaderboard_table = gr.Dataframe(
         value=init_df,
         elem_classes="dataframe"
     )
+    metrics_view = gr.HTML(value=init_metrics)
+    # Event handlers
     leaderboard_selector.change(
         fn=update_leaderboard_table,
+        inputs=[leaderboard_selector, search_box],
+        outputs=[leaderboard_table, search_info, header_view, metrics_view]
     )
+    search_box.input(
+        fn=update_leaderboard_table,
+        inputs=[leaderboard_selector, search_box],
+        outputs=[leaderboard_table, search_info, header_view, metrics_view]
     )
     refresh_btn.click(
+        fn=lambda: gr.Dropdown(choices=get_available_leaderboards()),
+        outputs=[leaderboard_selector]
     ).then(
+        fn=lambda: clear_cache()
     ).then(
         fn=update_leaderboard_table,
+        inputs=[leaderboard_selector, search_box],
+        outputs=[leaderboard_table, search_info, header_view, metrics_view]
     )
     DATA_DIR.mkdir(exist_ok=True)
 if __name__ == "__main__":

data_loader.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+Data Loader: Load from HuggingFace, parse JSON files, and build tables.
+"""
+import json
+import pandas as pd
+from pathlib import Path
+from datasets import load_dataset
+# Global caches
+HF_DATASET_CACHE = {}
+LEADERBOARD_CACHE = {}
+DATA_DIR = Path("leaderboard_data")
+def load_hf_dataset_on_startup():
+    """Load all splits from HuggingFace dataset at startup."""
+    print("Loading dataset from HuggingFace...")
+    try:
+        dataset = load_dataset("deepmage121/eee_test")
+        for split_name, split_data in dataset.items():
+            print(f"Loading split: {split_name} ({len(split_data)} rows)")
+            df = split_data.to_pandas()
+            parsed_items = []
+            for _, row in df.iterrows():
+                evaluation_results = json.loads(row['evaluation_results'])
+                results = {}
+                for eval_result in evaluation_results:
+                    eval_name = eval_result.get("evaluation_name")
+                    score = eval_result.get("score_details", {}).get("score")
+                    if eval_name and score is not None:
+                        results[eval_name] = score
+                additional_details = {}
+                if pd.notna(row.get('additional_details')):
+                    additional_details = json.loads(row['additional_details'])
+                parsed_item = {
+                    "leaderboard": row['_leaderboard'],
+                    "provider": row['source_organization_name'],
+                    "model": row['model_id'],
+                    "developer": row['model_developer'],
+                    "params": additional_details.get('params_billions'),
+                    "architecture": additional_details.get('architecture', 'Unknown'),
+                    "precision": additional_details.get('precision', 'Unknown'),
+                    "results": results,
+                    "raw_data": {
+                        "schema_version": row['schema_version'],
+                        "evaluation_id": row['evaluation_id'],
+                        "retrieved_timestamp": row['retrieved_timestamp'],
+                        "source_data": json.loads(row['source_data']),
+                        "evaluation_source": {
+                            "evaluation_source_name": row['evaluation_source_name'],
+                            "evaluation_source_type": row['evaluation_source_type']
+                        },
+                        "source_metadata": {
+                            "source_organization_name": row['source_organization_name'],
+                            "evaluator_relationship": row['evaluator_relationship'],
+                        },
+                        "model_info": {
+                            "name": row['model_name'],
+                            "id": row['model_id'],
+                            "developer": row['model_developer'],
+                        },
+                        "evaluation_results": evaluation_results,
+                        "additional_details": additional_details
+                    }
+                }
+                if pd.notna(row.get('source_organization_url')):
+                    parsed_item["raw_data"]["source_metadata"]["source_organization_url"] = row['source_organization_url']
+                if pd.notna(row.get('source_organization_logo_url')):
+                    parsed_item["raw_data"]["source_metadata"]["source_organization_logo_url"] = row['source_organization_logo_url']
+                if pd.notna(row.get('model_inference_platform')):
+                    parsed_item["raw_data"]["model_info"]["inference_platform"] = row['model_inference_platform']
+                parsed_items.append(parsed_item)
+            HF_DATASET_CACHE[split_name] = parsed_items
+        print(f"Loaded {len(HF_DATASET_CACHE)} leaderboard(s) from HuggingFace")
+        return True
+    except Exception as e:
+        print(f"Warning: Could not load HuggingFace dataset: {e}")
+        print("Falling back to local file system...")
+        return False
+def parse_eval_json(file_path):
+    """Parses a single JSON file to extract model, provider, and results."""
+    try:
+        with open(file_path, 'r') as f:
+            data = json.load(f)
+        leaderboard_name = data.get("evaluation_source", {}).get("evaluation_source_name", "Unknown Leaderboard")
+        provider_name = data.get("source_metadata", {}).get("source_organization_name", "Unknown Provider")
+        model_id = data.get("model_info", {}).get("id", "Unknown Model")
+        developer_name = data.get("model_info", {}).get("developer", "Unknown Developer")
+        params = data.get("model_info", {}).get("params_billions", None)
+        architecture = data.get("model_info", {}).get("architecture", "Unknown")
+        precision = data.get("additional_details", {}).get("precision", "Unknown")
+        if precision == "Unknown":
+             precision = data.get("model_info", {}).get("precision", "Unknown")
+        results = {}
+        if "evaluation_results" in data:
+            for res in data["evaluation_results"]:
+                eval_name = res.get("evaluation_name", "Unknown Metric")
+                score = res.get("score_details", {}).get("score", None)
+                if score is not None:
+                    results[eval_name] = score
+        return {
+            "leaderboard": leaderboard_name,
+            "provider": provider_name,
+            "model": model_id,
+            "developer": developer_name,
+            "params": params,
+            "architecture": architecture,
+            "precision": precision,
+            "results": results,
+            "raw_data": data
+        }
+    except Exception as e:
+        print(f"Error parsing {file_path}: {e}")
+        return None
+def get_available_leaderboards():
+    """Returns available leaderboards from HF cache or local directory."""
+    if HF_DATASET_CACHE:
+        return list(HF_DATASET_CACHE.keys())
+    if not DATA_DIR.exists():
+        return []
+    return [d.name for d in DATA_DIR.iterdir() if d.is_dir()]
+def walk_eval_files(leaderboard_name):
+    """Generator that walks through Leaderboard directory recursively."""
+    lb_path = DATA_DIR / leaderboard_name
+    if not lb_path.exists():
+        return
+    yield from lb_path.rglob("*.json")
+def get_eval_metadata(selected_leaderboard):
+    """Extracts evaluation metadata from the leaderboard data."""
+    if not selected_leaderboard:
+        return {}
+    eval_metadata = {"evals": {}, "source_info": {}}
+    if selected_leaderboard in HF_DATASET_CACHE:
+        parsed_items = HF_DATASET_CACHE[selected_leaderboard]
+        if parsed_items:
+            parsed = parsed_items[0]
+            source_meta = parsed["raw_data"].get("source_metadata", {})
+            source_data_list = parsed["raw_data"].get("source_data", [])
+            url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#"
+            eval_metadata["source_info"] = {
+                "organization": source_meta.get("source_organization_name", "Unknown"),
+                "relationship": source_meta.get("evaluator_relationship", "Unknown"),
+                "url": url
+            }
+            if "evaluation_results" in parsed["raw_data"]:
+                for res in parsed["raw_data"]["evaluation_results"]:
+                    eval_name = res.get("evaluation_name", "Unknown Metric")
+                    if eval_name not in eval_metadata["evals"]:
+                        metric_config = res.get("metric_config", {})
+                        eval_metadata["evals"][eval_name] = {
+                            "description": metric_config.get("evaluation_description", "No description available"),
+                            "score_type": metric_config.get("score_type", "unknown"),
+                            "lower_is_better": metric_config.get("lower_is_better", False),
+                            "min_score": metric_config.get("min_score"),
+                            "max_score": metric_config.get("max_score"),
+                            "level_names": metric_config.get("level_names", []),
+                            "level_metadata": metric_config.get("level_metadata", []),
+                            "has_unknown_level": metric_config.get("has_unknown_level", False)
+                        }
+        return eval_metadata
+    # Fall back to file system
+    for json_file in walk_eval_files(selected_leaderboard):
+        parsed = parse_eval_json(json_file)
+        if parsed:
+            if not eval_metadata["source_info"]:
+                 source_meta = parsed["raw_data"].get("source_metadata", {})
+                 source_data_list = parsed["raw_data"].get("source_data", [])
+                 url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#"
+                 eval_metadata["source_info"] = {
+                     "organization": source_meta.get("source_organization_name", "Unknown"),
+                     "relationship": source_meta.get("evaluator_relationship", "Unknown"),
+                     "url": url
+                 }
+            if "evaluation_results" in parsed["raw_data"]:
+                for res in parsed["raw_data"]["evaluation_results"]:
+                    eval_name = res.get("evaluation_name", "Unknown Metric")
+                    if eval_name not in eval_metadata["evals"]:
+                        metric_config = res.get("metric_config", {})
+                        eval_metadata["evals"][eval_name] = {
+                            "description": metric_config.get("evaluation_description", "No description available"),
+                            "score_type": metric_config.get("score_type", "unknown"),
+                            "lower_is_better": metric_config.get("lower_is_better", False),
+                            "min_score": metric_config.get("min_score"),
+                            "max_score": metric_config.get("max_score"),
+                            "level_names": metric_config.get("level_names", []),
+                            "level_metadata": metric_config.get("level_metadata", []),
+                            "has_unknown_level": metric_config.get("has_unknown_level", False)
+                        }
+            break
+    return eval_metadata
+def build_leaderboard_table(selected_leaderboard, search_query="", progress_callback=None):
+    """Builds the leaderboard DataFrame from cache or files."""
+    if not selected_leaderboard:
+        return pd.DataFrame()
+    if selected_leaderboard in LEADERBOARD_CACHE:
+        df, _ = LEADERBOARD_CACHE[selected_leaderboard]
+    else:
+        rows = []
+        if selected_leaderboard in HF_DATASET_CACHE:
+            if progress_callback:
+                progress_callback(0, desc=f"Loading {selected_leaderboard} from cache...")
+            parsed_items = HF_DATASET_CACHE[selected_leaderboard]
+            for i, parsed in enumerate(parsed_items):
+                if i % 100 == 0 and progress_callback:
+                    progress_callback((i / len(parsed_items)), desc=f"Processing {selected_leaderboard}...")
+                row = {
+                    "Model": parsed["model"],
+                    "Developer": parsed["developer"],
+                    "Params (B)": parsed["params"],
+                    "Arch": parsed["architecture"],
+                    "Precision": parsed["precision"]
+                }
+                row.update(parsed["results"])
+                rows.append(row)
+        else:
+            # Fall back to file system
+            if progress_callback:
+                progress_callback(0, desc=f"Scanning {selected_leaderboard}...")
+            all_files = list(walk_eval_files(selected_leaderboard))
+            total_files = len(all_files)
+            for i, json_file in enumerate(all_files):
+                if i % 100 == 0 and progress_callback:
+                     progress_callback((i / total_files), desc=f"Loading {selected_leaderboard}...")
+                parsed = parse_eval_json(json_file)
+                if parsed:
+                    row = {
+                        "Model": parsed["model"],
+                        "Developer": parsed["developer"],
+                        "Params (B)": parsed["params"],
+                        "Arch": parsed["architecture"],
+                        "Precision": parsed["precision"]
+                    }
+                    row.update(parsed["results"])
+                    rows.append(row)
+        if not rows:
+            df = pd.DataFrame(columns=["Model", "Developer", "Params (B)", "Arch", "Precision"])
+            LEADERBOARD_CACHE[selected_leaderboard] = (df, None)
+            return df
+        df = pd.DataFrame(rows)
+        df = df.dropna(axis=1, how='all')
+        if df.empty:
+             LEADERBOARD_CACHE[selected_leaderboard] = (df, None)
+             return df
+        numeric_cols = df.select_dtypes(include=['float', 'int']).columns
+        df[numeric_cols] = df[numeric_cols].round(3)
+        # Add Average Score
+        eval_only_cols = [c for c in numeric_cols if c not in ["Params (B)"]]
+        if len(eval_only_cols) > 0:
+            df["Average"] = df[eval_only_cols].mean(axis=1).round(3)
+        base_cols = ["Model", "Developer", "Params (B)", "Arch", "Precision", "Average"]
+        eval_cols = [c for c in df.columns if c not in base_cols]
+        base_cols = [c for c in base_cols if c in df.columns]
+        final_cols = base_cols + sorted(eval_cols)
+        df = df[final_cols]
+        if "Average" in df.columns:
+            df = df.sort_values("Average", ascending=False)
+        LEADERBOARD_CACHE[selected_leaderboard] = (df, None)
+    return df
+def clear_cache():
+    """Clears all caches."""
+    LEADERBOARD_CACHE.clear()

eval.schema.json ADDED Viewed

	@@ -0,0 +1,282 @@

+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "version": "0.0.1",
+    "type": "object",
+    "description": "Schema for storing and validating LLMs evaluation data, including model configuration, prompts, instances, Output, and evaluation metrics",
+    "required": [
+        "schema_version",
+        "evaluation_id",
+        "evaluation_source",
+        "retrieved_timestamp",
+        "source_data",
+        "source_metadata",
+        "model_info",
+        "evaluation_results"
+    ],
+    "properties": {
+        "schema_version": {
+            "type": "string",
+            "description": "Version of the schema used for this evaluation data"
+        },
+        "evaluation_id": {
+            "type": "string",
+            "description": "Unique identifier for this specific evaluation run. Use org_name/eval_name/retrieved_timestamp format"
+        },
+        "retrieved_timestamp": {
+            "type": "string",
+            "description": "Timestamp for when this record was created"
+        },
+        "source_data": {
+            "type": "array",
+            "description": "URLs for the source of the evaluation data",
+            "items": {
+                "type": "string"
+            }
+        },
+        "evaluation_source": {
+            "type": "object",
+            "description": "Details about evaluation origin. There are options that evaluations come from leaderboards (e.g. Live Code Bench Pro) or evaluation platforms (e.g. lm-eval, inspect ai, HELM...).",
+            "required": [
+                "evaluation_source_name",
+                "evaluation_source_type"
+            ],
+            "properties": {
+                "evaluation_source_name": {
+                    "type": "string",
+                    "description": "Name of the source (e.g. title of the source leaderboard or name of the platform used for the evaluation."
+                },
+                "evaluation_source_type": {
+                    "type": "string",
+                    "enum": [
+                        "leaderboard",
+                        "evaluation_platform"
+                    ],
+                    "description": "Type of evaluation source, e.g., leaderboard or evaluation platform"
+                }
+            }
+        },
+        "source_metadata": {
+            "type": "object",
+            "description": "Metadata about the source of the leaderboard data",
+            "required": [
+                "source_organization_name",
+                "evaluator_relationship"
+            ],
+            "properties": {
+                "source_organization_name": {
+                    "type": "string",
+                    "description": "Name of the organization that provides the data"
+                },
+                "source_organization_url": {
+                    "type": "string",
+                    "description": "URL for the organization that provides the data"
+                },
+                "source_organization_logo_url": {
+                    "type": "string",
+                    "description": "URL for the Logo for the organization that provides the data"
+                },
+                "evaluator_relationship": {
+                    "type": "string",
+                    "description": "Relationship between the evaluator and the model",
+                    "enum": [
+                        "first_party",
+                        "third_party",
+                        "collaborative",
+                        "other"
+                    ]
+                }
+            }
+        },
+        "model_info": {
+            "type": "object",
+            "description": "Complete model specification including basic information, technical configuration and inference settings",
+            "required": [
+                "name",
+                "id"
+            ],
+            "properties": {
+                "name": {
+                    "type": "string",
+                    "description": "Model name provided by evaluation source"
+                },
+                "id": {
+                    "type": "string",
+                    "description": "Model name standarized to HuggingFace format (e.g. meta-llama/Llama-3.1-8B-Instruct)"
+                },
+                "developer": {
+                    "type": "string",
+                    "description": "Name of organization that provides the model (e.g. 'OpenAI')"
+                },
+                "inference_platform": {
+                    "type": "string",
+                    "description": "Description of platform used to run the evaluations (e.g. local machine, Bedrock)"
+                }
+            }
+        },
+        "evaluation_results": {
+            "type": "array",
+            "description": "Array of evaluation results",
+            "items": {
+                "type": "object",
+                "required": [
+                    "evaluation_name",
+                    "metric_config",
+                    "score_details"
+                ],
+                "properties": {
+                    "evaluation_name": {
+                        "type": "string",
+                        "description": "Name of the evaluation"
+                    },
+                    "evaluation_timestamp": {
+                        "type": "string",
+                        "description": "Timestamp for when the evaluations were run"
+                    },
+                    "metric_config": {
+                        "type": "object",
+                        "description": "Details about the metric",
+                        "required": [
+                            "lower_is_better"
+                        ],
+                        "properties": {
+                            "evaluation_description": {
+                                "type": "string",
+                                "description": "Description of the evaluation"
+                            },
+                            "lower_is_better": {
+                                "type": "boolean",
+                                "description": "Whether a lower score is better"
+                            },
+                            "score_type": {
+                                "type": "string",
+                                "description": "Type of score",
+                                "enum": [
+                                    "binary",
+                                    "continuous",
+                                    "levels"
+                                ]
+                            },
+                            "level_names": {
+                                "type": "array",
+                                "description": "Names of the score levels",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            "level_metadata": {
+                                "type": "array",
+                                "description": "Additional Description for each Score Level",
+                                "items": {
+                                    "type": "string"
+                                }
+                            },
+                            "has_unknown_level": {
+                                "type": "boolean",
+                                "description": "Indicates whether there is an Unknown Level - if True, then a score of -1 will be treated as Unknown"
+                            },
+                            "min_score": {
+                                "type": "number",
+                                "description": "Minimum possible score for continuous metric"
+                            },
+                            "max_score": {
+                                "type": "number",
+                                "description": "Maximum possible score for continuous metric"
+                            }
+                        },
+                        "if": {
+                            "properties": {
+                                "score_type": {
+                                    "const": "levels"
+                                }
+                            }
+                        },
+                        "then": {
+                            "required": [
+                                "level_names",
+                                "has_unknown_level"
+                            ]
+                        },
+                        "else": {
+                            "if": {
+                                "properties": {
+                                    "score_type": {
+                                        "const": "continuous"
+                                    }
+                                }
+                            },
+                            "then": {
+                                "required": [
+                                    "min_score",
+                                    "max_score"
+                                ]
+                            }
+                        }
+                    },
+                    "score_details": {
+                        "type": "object",
+                        "description": "The score for the evaluation and related details",
+                        "required": [
+                            "score"
+                        ],
+                        "properties": {
+                            "score": {
+                                "type": "number",
+                                "description": "The score for the evaluation"
+                            },
+                            "details": {
+                                "type": "object",
+                                "description": "Any additional details about the score",
+                                "additionalProperties": true
+                            }
+                        }
+                    },
+                    "detailed_evaluation_results_url": {
+                        "type": "string",
+                        "description": "Link to detailed evaluation data"
+                    },
+                    "generation_config": {
+                        "type": "object",
+                        "generation_args": {
+                                "type": "object",
+                                "description": "Parameters used to generate results - properties may vary by model type",
+                                "properties": {
+                                    "temperature": {
+                                        "type": [
+                                            "null",
+                                            "number"
+                                        ],
+                                        "description": "Sampling temperature"
+                                    },
+                                    "top_p": {
+                                        "type": [
+                                            "null",
+                                            "number"
+                                        ],
+                                        "description": "Nucleus sampling parameter"
+                                    },
+                                    "top_k": {
+                                        "type": [
+                                            "null",
+                                            "number"
+                                        ],
+                                        "description": "Top-k sampling parameter"
+                                    },
+                                    "max_tokens": {
+                                        "type": "integer",
+                                        "minimum": 1,
+                                        "description": "Maximum number of tokens to generate"
+                                    }
+                                },
+                                "additionalProperties": true
+                        },
+                        "additional_details": {
+                            "type": "string",
+                            "description": "Additional details about how the results for this metric were generated."
+                        }
+                    }
+                }
+            }
+        }
+    }
+}

hf_operations.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""
+HuggingFace Operations: Upload data, create PRs, validate schemas.
+"""
+from huggingface_hub import HfApi, login
+import pandas as pd
+import json
+from pathlib import Path
+from jsonschema import validate, ValidationError, Draft7Validator
+# Load schema once at module level
+SCHEMA_PATH = Path(__file__).parent / "eval.schema.json"
+with open(SCHEMA_PATH, 'r') as f:
+    EVAL_SCHEMA = json.load(f)
+def validate_json_against_schema(json_data):
+    """
+    Validate a JSON object against eval.schema.json.
+    Args:
+        json_data: Dict containing the evaluation data
+    Returns:
+        (bool, str): (is_valid, error_message)
+    """
+    try:
+        validate(instance=json_data, schema=EVAL_SCHEMA)
+        return True, "Schema validation passed"
+    except ValidationError as e:
+        # Extract the most relevant error message
+        error_path = " → ".join(str(p) for p in e.path) if e.path else "root"
+        return False, f"❌ Schema validation failed at '{error_path}': {e.message}"
+    except Exception as e:
+        return False, f"❌ Validation error: {str(e)}"
+def upload_to_hf_dataset(parquet_file, split_name, repo_id="deepmage121/eee_test"):
+    """
+    Upload a parquet file as a new split to the HF dataset.
+    Args:
+        parquet_file: Path to parquet file
+        split_name: Name of the split (leaderboard name)
+        repo_id: HuggingFace dataset repository ID
+    """
+    # TODO: Implement upload logic
+    pass
+def check_hf_authentication():
+    """
+    Check if user is authenticated with HuggingFace.
+    Returns:
+        (bool, str): (is_authenticated, username or error_message)
+    """
+    try:
+        api = HfApi()
+        user_info = api.whoami()
+        return True, user_info['name']
+    except Exception as e:
+        return False, "Not authenticated. Run: huggingface-cli login"
+def check_duplicate_pr_exists(leaderboard_name, repo_id="deepmage121/eee_test"):
+    """
+    Check if a PR already exists for this leaderboard.
+    Args:
+        leaderboard_name: Name of the leaderboard
+        repo_id: HuggingFace dataset repository ID
+    Returns:
+        (bool, str or None): (exists, pr_url if exists)
+    """
+    try:
+        api = HfApi()
+        discussions = api.get_repo_discussions(repo_id=repo_id, repo_type="dataset")
+        # Check for open PRs with matching title
+        pr_title_pattern = f"add new leaderboard: {leaderboard_name.lower()}"
+        for discussion in discussions:
+            if discussion.is_pull_request and discussion.status == "open":
+                if pr_title_pattern in discussion.title.lower():
+                    pr_url = f"https://huggingface.co/datasets/{repo_id}/discussions/{discussion.num}"
+                    return True, pr_url
+        return False, None
+    except Exception as e:
+        # If we can't check, assume no duplicate (fail open)
+        print(f"Warning: Could not check for duplicate PRs: {e}")
+        return False, None
+def create_pr_for_new_leaderboard(leaderboard_name, parquet_file, repo_id="deepmage121/eee_test"):
+    """
+    Create a pull request to add a new leaderboard split.
+    Args:
+        leaderboard_name: Name of the new leaderboard
+        parquet_file: Path to parquet file
+        repo_id: HuggingFace dataset repository ID
+    Returns:
+        (success, pr_url or error_message)
+    """
+    # 1. Check authentication
+    is_auth, auth_result = check_hf_authentication()
+    if not is_auth:
+        return False, f"❌ {auth_result}"
+    # 2. Check for duplicate PR
+    has_duplicate, duplicate_url = check_duplicate_pr_exists(leaderboard_name, repo_id)
+    if has_duplicate:
+        return False, f"⚠️ PR already exists: {duplicate_url}"
+    # 3. Validate parquet file exists and has data
+    parquet_path = Path(parquet_file)
+    if not parquet_path.exists():
+        return False, "❌ Parquet file not found"
+    df = pd.read_parquet(parquet_file)
+    if len(df) == 0:
+        return False, "❌ Parquet file is empty"
+    # 4. Create PR
+    try:
+        api = HfApi()
+        # Upload the parquet file to the branch
+        commit_message = f"Add new leaderboard: {leaderboard_name}"
+        # Upload file and create PR
+        commit_info = api.upload_file(
+            path_or_fileobj=parquet_file,
+            path_in_repo=f"data/{leaderboard_name}.parquet",
+            repo_id=repo_id,
+            repo_type="dataset",
+            commit_message=commit_message,
+            create_pr=True,
+        )
+        # Extract PR URL from commit info
+        pr_url = commit_info.pr_url if hasattr(commit_info, 'pr_url') else f"https://huggingface.co/datasets/{repo_id}/discussions"
+        return True, f"PR created ({len(df)} rows): {pr_url}"
+    except Exception as e:
+        return False, f"❌ Failed to create PR: {str(e)}"
+def validate_schema(parquet_file):
+    """
+    Validate that a parquet file matches the expected schema.
+    Args:
+        parquet_file: Path to parquet file to validate
+    Returns:
+        (bool, str): (is_valid, error_message)
+    """
+    try:
+        df = pd.read_parquet(parquet_file)
+        # Required columns
+        required_cols = [
+            '_leaderboard', '_developer', '_model', '_uuid',
+            'schema_version', 'evaluation_id', 'retrieved_timestamp',
+            'source_data', 'evaluation_source_name', 'evaluation_source_type',
+            'source_organization_name', 'evaluator_relationship',
+            'model_name', 'model_id', 'model_developer',
+            'evaluation_results'
+        ]
+        missing = [col for col in required_cols if col not in df.columns]
+        if missing:
+            return False, f"Missing required columns: {', '.join(missing)}"
+        # Check data types (all should be strings)
+        for col in df.columns:
+            if df[col].dtype not in ['object', 'string']:
+                return False, f"Column '{col}' has wrong type: {df[col].dtype} (expected string)"
+        return True, "Schema validation passed"
+    except Exception as e:
+        return False, f"Validation error: {str(e)}"
+def export_to_json(parquet_file, output_dir):
+    """
+    Export parquet data back to JSON files.
+    Uses the parquet_to_folder function from json_to_parquet.py
+    Args:
+        parquet_file: Path to parquet file
+        output_dir: Directory to write JSON files to
+    """
+    from json_to_parquet import parquet_to_folder
+    parquet_to_folder(parquet_file, output_dir)

json_to_parquet.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import json
+from pathlib import Path
+import pandas as pd
+def json_to_row(json_path: Path) -> dict:
+    """Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns)."""
+    with open(json_path, 'r') as f:
+        data = json.load(f)
+    required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp",
+                      "source_data", "source_metadata", "model_info", "evaluation_results"]
+    for field in required_fields:
+        if field not in data:
+            raise ValueError(f"{json_path}: Missing required field '{field}'")
+    if "evaluation_source_name" not in data["evaluation_source"]:
+        raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'")
+    if "evaluation_source_type" not in data["evaluation_source"]:
+        raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'")
+    if "source_organization_name" not in data["source_metadata"]:
+        raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'")
+    if "evaluator_relationship" not in data["source_metadata"]:
+        raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'")
+    if "name" not in data["model_info"]:
+        raise ValueError(f"{json_path}: Missing required field 'model_info.name'")
+    if "id" not in data["model_info"]:
+        raise ValueError(f"{json_path}: Missing required field 'model_info.id'")
+    if "developer" not in data["model_info"]:
+        raise ValueError(f"{json_path}: Missing required field 'model_info.developer'")
+    leaderboard = data["evaluation_source"]["evaluation_source_name"]
+    model = data["model_info"]["id"]
+    uuid = json_path.stem
+    developer = data["model_info"]["developer"]
+    # Validate evaluation results
+    for eval_result in data["evaluation_results"]:
+        if "evaluation_name" not in eval_result:
+            raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'")
+        if "metric_config" not in eval_result:
+            raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'")
+        if "score_details" not in eval_result:
+            raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'")
+        if "lower_is_better" not in eval_result["metric_config"]:
+            raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'")
+        if "score" not in eval_result["score_details"]:
+            raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'")
+    row = {
+        # Folder structure (for reconstruction)
+        "_leaderboard": leaderboard,
+        "_developer": developer,
+        "_model": model,
+        "_uuid": uuid,
+        # Required top-level fields
+        "schema_version": data["schema_version"],
+        "evaluation_id": data["evaluation_id"],
+        "retrieved_timestamp": data["retrieved_timestamp"],
+        "source_data": json.dumps(data["source_data"]),
+        # Required nested fields
+        "evaluation_source_name": data["evaluation_source"]["evaluation_source_name"],
+        "evaluation_source_type": data["evaluation_source"]["evaluation_source_type"],
+        "source_organization_name": data["source_metadata"]["source_organization_name"],
+        "source_organization_url": data["source_metadata"].get("source_organization_url"),
+        "source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"),
+        "evaluator_relationship": data["source_metadata"]["evaluator_relationship"],
+        "model_name": data["model_info"]["name"],
+        "model_id": data["model_info"]["id"],
+        "model_developer": data["model_info"]["developer"],
+        "model_inference_platform": data["model_info"].get("inference_platform"),
+        # Store full evaluation_results and additional_details as JSON
+        "evaluation_results": json.dumps(data["evaluation_results"]),
+        "additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None,
+    }
+    return row
+def add_to_parquet(json_or_folder: str, parquet_file: str):
+    """
+    Add JSON(s) to Parquet file.
+    Creates new file if it doesn't exist, appends and deduplicates if it does.
+    Args:
+        json_or_folder: Path to single JSON file or folder containing JSONs
+        parquet_file: Output Parquet file path
+    """
+    input_path = Path(json_or_folder)
+    if input_path.is_file():
+        json_files = [input_path]
+    elif input_path.is_dir():
+        json_files = list(input_path.rglob("*.json"))
+        if not json_files:
+            raise ValueError(f"No JSON files found in directory: {json_or_folder}")
+    else:
+        raise ValueError(f"Invalid input: {json_or_folder}")
+    print(f"Processing {len(json_files)} JSON file(s)...")
+    parquet_path = Path(parquet_file)
+    if parquet_path.exists():
+        existing_df = pd.read_parquet(parquet_file)
+        existing_keys = set(
+            existing_df[["_leaderboard", "_developer", "_model", "_uuid"]]
+            .apply(tuple, axis=1)
+        )
+        print(f"Found {len(existing_df)} existing rows")
+    else:
+        existing_df = None
+        existing_keys = set()
+    all_rows = []
+    skipped = 0
+    for i, jf in enumerate(json_files, 1):
+        if i % 100 == 0:
+            print(f"  {i}/{len(json_files)}")
+        row = json_to_row(jf)
+        key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"])
+        if key not in existing_keys:
+            all_rows.append(row)
+            existing_keys.add(key)
+        else:
+            skipped += 1
+    if skipped > 0:
+        print(f"  Skipped {skipped} duplicate file(s)")
+    # Handle case where no new rows to add
+    if not all_rows:
+        if existing_df is not None:
+            print(f"No new files to add, keeping existing {len(existing_df)} file(s)")
+            return
+        else:
+            raise ValueError("No valid JSON files to process and no existing parquet file")
+    new_df = pd.DataFrame(all_rows)
+    if existing_df is not None:
+        df = pd.concat([existing_df, new_df], ignore_index=True)
+        print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)")
+    else:
+        df = new_df
+    df.to_parquet(parquet_file, index=False)
+    print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)")
+def parquet_to_folder(parquet_file: str, output_dir: str):
+    """Reconstruct folder structure from Parquet."""
+    df = pd.read_parquet(parquet_file)
+    out = Path(output_dir)
+    for _, row in df.iterrows():
+        lb = row["_leaderboard"]
+        dev = row["_developer"]
+        model = row["_model"]
+        uuid = row["_uuid"]
+        json_data = {
+            "schema_version": row["schema_version"],
+            "evaluation_id": row["evaluation_id"],
+            "retrieved_timestamp": row["retrieved_timestamp"],
+            "source_data": json.loads(row["source_data"]),
+            "evaluation_source": {
+                "evaluation_source_name": row["evaluation_source_name"],
+                "evaluation_source_type": row["evaluation_source_type"]
+            },
+            "source_metadata": {
+                "source_organization_name": row["source_organization_name"],
+                "evaluator_relationship": row["evaluator_relationship"]
+            },
+            "model_info": {
+                "name": row["model_name"],
+                "id": row["model_id"],
+                "developer": row["model_developer"]
+            },
+            "evaluation_results": json.loads(row["evaluation_results"])
+        }
+        if pd.notna(row["source_organization_url"]):
+            json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"]
+        if pd.notna(row["source_organization_logo_url"]):
+            json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"]
+        if pd.notna(row["model_inference_platform"]):
+            json_data["model_info"]["inference_platform"] = row["model_inference_platform"]
+        if pd.notna(row["additional_details"]):
+            json_data["additional_details"] = json.loads(row["additional_details"])
+        file_path = out / lb / dev / model / f"{uuid}.json"
+        file_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(file_path, 'w') as f:
+            json.dump(json_data, f, indent=2)
+    print(f"Reconstructed {len(df)} files to {output_dir}")
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage:")
+        print("  python json_to_parquet.py add <json_or_folder> <output.parquet>")
+        print("  python json_to_parquet.py export <input.parquet> <output_dir>")
+        sys.exit(1)
+    cmd = sys.argv[1]
+    if cmd == "add":
+        add_to_parquet(sys.argv[2], sys.argv[3])
+    elif cmd == "export":
+        parquet_to_folder(sys.argv[2], sys.argv[3])
+    else:
+        print(f"Unknown command: {cmd}")

leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/0-hero_Matter-0.2-7B-DPO/1762652579.4626381",
-  "retrieved_timestamp": "1762652579.462642",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "0-hero/Matter-0.2-7B-DPO",
-    "developer": "0-hero",
-    "inference_platform": "unknown",
-    "id": "0-hero/Matter-0.2-7B-DPO"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3302792147058693
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3596254301656297
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.014350453172205438
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.25922818791946306
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.381375
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.1163563829787234
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "MistralForCausalLM",
-    "params_billions": 7.242
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-32K/1762652579.463656",
-  "retrieved_timestamp": "1762652579.463657",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-34B-32K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-34B-32K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3118691737922047
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6015685776542417
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.1540785498489426
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36325503355704697
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4398229166666667
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4709109042553192
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat-16K/1762652579.464125",
-  "retrieved_timestamp": "1762652579.4641259",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-34B-Chat-16K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-34B-Chat-16K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.456449997118756
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6100218256499571
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.21374622356495468
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.33808724832214765
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.43976041666666665
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.45445478723404253
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat/1762652579.463886",
-  "retrieved_timestamp": "1762652579.4638872",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-34B-Chat",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-34B-Chat"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6066758423205982
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6083748310271819
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.277190332326284
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3649328859060403
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4281979166666667
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.45204454787234044
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B/1762652579.4633532",
-  "retrieved_timestamp": "1762652579.463354",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-34B",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-34B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2841172533322695
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5976391706360018
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.15332326283987915
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36577181208053694
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4236041666666667
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4665890957446808
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B-Chat/1762652579.464571",
-  "retrieved_timestamp": "1762652579.464572",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-6B-Chat",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-6B-Chat"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5145270105542183
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4571311331954389
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.1623867069486405
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.30201342281879195
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.43917708333333333
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3193151595744681
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.061
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B/1762652579.464354",
-  "retrieved_timestamp": "1762652579.464355",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-6B",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-6B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.26166017278598563
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.44925820198929056
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.06646525679758308
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.313758389261745
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.43740625
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.31441156914893614
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.061
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-32K/1762652579.4649951",
-  "retrieved_timestamp": "1762652579.464996",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-9B-32K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-9B-32K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.23031113002389217
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.496332115988265
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.10800604229607251
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35906040268456374
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4186145833333333
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.37649601063829785
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat-16K/1762652579.465471",
-  "retrieved_timestamp": "1762652579.465471",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-9B-Chat-16K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-9B-Chat-16K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4214040966856829
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5153383364651778
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.1782477341389728
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3087248322147651
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.40990624999999997
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.39935172872340424
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat/1762652579.465226",
-  "retrieved_timestamp": "1762652579.465226",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-9B-Chat",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-9B-Chat"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6045525871354672
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.555906430281685
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2258308157099698
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3347315436241611
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42590625
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.39752327127659576
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B/1762652579.464781",
-  "retrieved_timestamp": "1762652579.464782",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-1.5-9B",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-1.5-9B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.29358435617494916
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.514294179104191
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.11404833836858005
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.37919463087248323
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.43278124999999995
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3916223404255319
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-200K/1762652579.465893",
-  "retrieved_timestamp": "1762652579.465894",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-34B-200K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-34B-200K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.15424850507763843
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5441817925289527
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.05740181268882175
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3565436241610738
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.38171874999999994
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.45345744680851063
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-Chat/1762652579.466115",
-  "retrieved_timestamp": "1762652579.4661162",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-34B-Chat",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-34B-Chat"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4698887839820565
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5560872910766164
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.06268882175226587
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.33808724832214765
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.39784375
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4093251329787234
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B/1762652579.4656792",
-  "retrieved_timestamp": "1762652579.46568",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-34B",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-34B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3045751938190667
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5457099951794562
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.0513595166163142
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36661073825503354
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4118541666666667
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.441156914893617
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-200K/1762652579.4665558",
-  "retrieved_timestamp": "1762652579.466557",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-6B-200K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-6B-200K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.08433068702154728
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42892948109603307
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.01812688821752266
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.28187919463087246
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.45873958333333337
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2844082446808511
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.061
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-Chat/1762652579.466805",
-  "retrieved_timestamp": "1762652579.466806",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-6B-Chat",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-6B-Chat"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.33952135888331847
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.41326019207548687
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.013595166163141994
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.29446308724832215
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36879166666666663
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3061003989361702
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.061
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B/1762652579.4663382",
-  "retrieved_timestamp": "1762652579.4663382",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-6B",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-6B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.28933784580468713
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4309230591000865
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.015861027190332326
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.26929530201342283
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.39368749999999997
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.29911901595744683
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.061
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B-200K/1762652579.467233",
-  "retrieved_timestamp": "1762652579.467233",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-9B-200K",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-9B-200K"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.23270921155866434
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4793302602023641
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.06646525679758308
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.31543624161073824
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42940625
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36220079787234044
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B/1762652579.46702",
-  "retrieved_timestamp": "1762652579.4670231",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-9B",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-9B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2708779372066118
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.49396075125308075
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.055891238670694864
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3179530201342282
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.40540624999999997
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35738031914893614
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/01-ai_Yi-Coder-9B-Chat/1762652579.4674509",
-  "retrieved_timestamp": "1762652579.4674518",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "01-ai/Yi-Coder-9B-Chat",
-    "developer": "01-ai",
-    "inference_platform": "unknown",
-    "id": "01-ai/Yi-Coder-9B-Chat"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4817041006750976
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.48142000339111674
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.04003021148036254
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.24748322147651006
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3991770833333333
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.24251994680851063
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.829
-  }
-}

leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/1762652579.468073",
-  "retrieved_timestamp": "1762652579.468074",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct",
-    "developer": "1-800-LLMs",
-    "inference_platform": "unknown",
-    "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.30774677854758703
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6284322714967584
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.311178247734139
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3699664429530201
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4490625
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.516373005319149
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 14.77
-  }
-}

leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/152334H_miqu-1-70b-sf/1762652579.469194",
-  "retrieved_timestamp": "1762652579.469195",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "152334H/miqu-1-70b-sf",
-    "developer": "152334H",
-    "inference_platform": "unknown",
-    "id": "152334H/miqu-1-70b-sf"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5181740005407873
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6102361685099691
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.12462235649546828
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35067114093959734
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.45820833333333333
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42278922872340424
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 68.977
-  }
-}

leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-7B-v0.1/1762652579.469481",
-  "retrieved_timestamp": "1762652579.469482",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "1TuanPham/T-VisStar-7B-v0.1",
-    "developer": "1TuanPham",
-    "inference_platform": "unknown",
-    "id": "1TuanPham/T-VisStar-7B-v0.1"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36070404305021786
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5052203113352468
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.05740181268882175
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.28523489932885904
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4375
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3210605053191489
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "MistralForCausalLM",
-    "params_billions": 7.294
-  }
-}

leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-v0.1/1762652579.469921",
-  "retrieved_timestamp": "1762652579.469923",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "1TuanPham/T-VisStar-v0.1",
-    "developer": "1TuanPham",
-    "inference_platform": "unknown",
-    "id": "1TuanPham/T-VisStar-v0.1"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36070404305021786
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5052203113352468
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.05740181268882175
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.28523489932885904
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4375
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3210605053191489
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "MistralForCausalLM",
-    "params_billions": 7.294
-  }
-}

leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_L-3.1-Science-Writer-8B/1762652579.470164",
-  "retrieved_timestamp": "1762652579.470165",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "3rd-Degree-Burn/L-3.1-Science-Writer-8B",
-    "developer": "3rd-Degree-Burn",
-    "inference_platform": "unknown",
-    "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42625012743963797
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5041306326216103
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.10347432024169184
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.27432885906040266
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3959479166666666
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36494348404255317
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.03
-  }
-}

leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/4season_final_model_test_v2/1762652579.4714398",
-  "retrieved_timestamp": "1762652579.4714408",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "4season/final_model_test_v2",
-    "developer": "4season",
-    "inference_platform": "unknown",
-    "id": "4season/final_model_test_v2"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3191132860809319
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6342049783295018
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.08383685800604229
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3271812080536913
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4314479166666667
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3528091755319149
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 21.421
-  }
-}

leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/1762652579.471838",
-  "retrieved_timestamp": "1762652579.471839",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview",
-    "developer": "AALF",
-    "inference_platform": "unknown",
-    "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.7189579205397235
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5119887898349903
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.24773413897280966
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3053691275167785
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.38200000000000006
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3732546542553192
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.03
-  }
-}

leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-SFT-preview/1762652579.472149",
-  "retrieved_timestamp": "1762652579.47215",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AALF/FuseChat-Llama-3.1-8B-SFT-preview",
-    "developer": "AALF",
-    "inference_platform": "unknown",
-    "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.7280504616639405
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5240303130445233
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.22507552870090636
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.30453020134228187
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.40199999999999997
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.37433510638297873
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.03
-  }
-}

leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AGI-0_Art-v0-3B/1762652579.473539",
-  "retrieved_timestamp": "1762652579.47354",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AGI-0/Art-v0-3B",
-    "developer": "AGI-0",
-    "inference_platform": "unknown",
-    "id": "AGI-0/Art-v0-3B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.319238509377341
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3400959483013824
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.24622356495468278
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.25922818791946306
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3768229166666666
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.11785239361702128
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 3.086
-  }
-}

leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-CoT/1762652579.474318",
-  "retrieved_timestamp": "1762652579.4743192",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AI-MO/NuminaMath-7B-CoT",
-    "developer": "AI-MO",
-    "inference_platform": "unknown",
-    "id": "AI-MO/NuminaMath-7B-CoT"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2688544173903022
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4314193495860012
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.26963746223564955
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.26593959731543626
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.33034375
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.28681848404255317
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.91
-  }
-}

leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-TIR/1762652579.474566",
-  "retrieved_timestamp": "1762652579.474567",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AI-MO/NuminaMath-7B-TIR",
-    "developer": "AI-MO",
-    "inference_platform": "unknown",
-    "id": "AI-MO/NuminaMath-7B-TIR"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.27562423259174545
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.41436913375897894
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.1608761329305136
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.25838926174496646
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35092708333333333
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2732712765957447
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 6.91
-  }
-}

leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_Llama-3-8B-instruct/1762652579.474785",
-  "retrieved_timestamp": "1762652579.474786",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AI-Sweden-Models/Llama-3-8B-instruct",
-    "developer": "AI-Sweden-Models",
-    "inference_platform": "unknown",
-    "id": "AI-Sweden-Models/Llama-3-8B-instruct"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.24012841482821137
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4173460154515302
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.03851963746223565
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.26593959731543626
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.47709375000000004
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.25972406914893614
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 8.03
-  }
-}

leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AI4free_Dhanishtha/1762652579.475332",
-  "retrieved_timestamp": "1762652579.475332",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AI4free/Dhanishtha",
-    "developer": "AI4free",
-    "inference_platform": "unknown",
-    "id": "AI4free/Dhanishtha"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2451240486353985
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.34039444943326375
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.25604229607250756
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2525167785234899
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35694791666666664
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.16431183510638298
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 1.777
-  }
-}

leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AI4free_t2/1762652579.475577",
-  "retrieved_timestamp": "1762652579.475578",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AI4free/t2",
-    "developer": "AI4free",
-    "inference_platform": "unknown",
-    "id": "AI4free/t2"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3866828902866616
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2910111436321769
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.18957703927492447
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2575503355704698
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3846354166666666
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.11436170212765957
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 7.613
-  }
-}

leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AIDC-AI_Marco-o1/1762652579.47579",
-  "retrieved_timestamp": "1762652579.4757912",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AIDC-AI/Marco-o1",
-    "developer": "AIDC-AI",
-    "inference_platform": "unknown",
-    "id": "AIDC-AI/Marco-o1"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.477083028586373
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5364362696398749
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.37462235649546827
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.25922818791946306
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.41384375
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.41165226063829785
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 7.616
-  }
-}

leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/Aashraf995_Creative-7B-nerd/1762652579.476046",
-  "retrieved_timestamp": "1762652579.476046",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "Aashraf995/Creative-7B-nerd",
-    "developer": "Aashraf995",
-    "inference_platform": "unknown",
-    "id": "Aashraf995/Creative-7B-nerd"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4721871301480073
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.5606785565640195
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3164652567975831
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3263422818791946
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4515416666666667
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.44921875
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 7.616
-  }
-}

leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AbacusResearch_Jallabi-34B/1762652579.477037",
-  "retrieved_timestamp": "1762652579.4770381",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AbacusResearch/Jallabi-34B",
-    "developer": "AbacusResearch",
-    "inference_platform": "unknown",
-    "id": "AbacusResearch/Jallabi-34B"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3528604103777976
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6023380603196266
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.05211480362537765
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3389261744966443
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.48217708333333337
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4681682180851064
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 34.389
-  }
-}

leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/Ahdoot_StructuredThinker-v0.3-MoreStructure/1762652579.4772868",
-  "retrieved_timestamp": "1762652579.477288",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "Ahdoot/StructuredThinker-v0.3-MoreStructure",
-    "developer": "Ahdoot",
-    "inference_platform": "unknown",
-    "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4192808415005519
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.48376906494893984
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.290785498489426
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.29697986577181207
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.41582291666666665
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.36103723404255317
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 3.397
-  }
-}

leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/Ahdoot_Test_StealthThinker/1762652579.4775438",
-  "retrieved_timestamp": "1762652579.4775438",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "Ahdoot/Test_StealthThinker",
-    "developer": "Ahdoot",
-    "inference_platform": "unknown",
-    "id": "Ahdoot/Test_StealthThinker"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42200361706937595
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.46466398134666304
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.17900302114803626
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2961409395973154
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.42804166666666665
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35970744680851063
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "Qwen2ForCausalLM",
-    "params_billions": 3.086
-  }
-}

leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/1762652579.478028",
-  "retrieved_timestamp": "1762652579.478029",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder",
-    "developer": "AicoresSecurity",
-    "inference_platform": "unknown",
-    "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.7097656440466851
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4477501104993749
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.1487915407854985
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.27181208053691275
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.34079166666666666
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.3178191489361702
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 3.213
-  }
-}

leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0/1762652579.4777558",
-  "retrieved_timestamp": "1762652579.477757",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0",
-    "developer": "AicoresSecurity",
-    "inference_platform": "unknown",
-    "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6358018945287394
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4497434194912941
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.11555891238670694
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2634228187919463
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.33136458333333335
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.301030585106383
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "bfloat16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 3.213
-  }
-}

leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "schema_version": "0.0.1",
-  "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/1762652579.478466",
-  "retrieved_timestamp": "1762652579.478467",
-  "source_data": [
-    "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
-  ],
-  "evaluation_source": {
-    "evaluation_source_name": "HF Open LLM v2",
-    "evaluation_source_type": "leaderboard"
-  },
-  "source_metadata": {
-    "source_organization_name": "Hugging Face",
-    "evaluator_relationship": "third_party"
-  },
-  "model_info": {
-    "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1",
-    "developer": "AicoresSecurity",
-    "inference_platform": "unknown",
-    "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1"
-  },
-  "evaluation_results": [
-    {
-      "evaluation_name": "IFEval",
-      "metric_config": {
-        "evaluation_description": "Accuracy on IFEval",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.6730209178313542
-      }
-    },
-    {
-      "evaluation_name": "BBH",
-      "metric_config": {
-        "evaluation_description": "Accuracy on BBH",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.4391775517124728
-      }
-    },
-    {
-      "evaluation_name": "MATH Level 5",
-      "metric_config": {
-        "evaluation_description": "Exact Match on MATH Level 5",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.17598187311178248
-      }
-    },
-    {
-      "evaluation_name": "GPQA",
-      "metric_config": {
-        "evaluation_description": "Accuracy on GPQA",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.2709731543624161
-      }
-    },
-    {
-      "evaluation_name": "MUSR",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MUSR",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.35409375000000004
-      }
-    },
-    {
-      "evaluation_name": "MMLU-PRO",
-      "metric_config": {
-        "evaluation_description": "Accuracy on MMLU-PRO",
-        "lower_is_better": false,
-        "score_type": "continuous",
-        "min_score": 0,
-        "max_score": 1
-      },
-      "score_details": {
-        "score": 0.308843085106383
-      }
-    }
-  ],
-  "additional_details": {
-    "precision": "float16",
-    "architecture": "LlamaForCausalLM",
-    "params_billions": 3.213
-  }
-}